use std::{
fmt::{self, Debug, Display},
path::{MAIN_SEPARATOR, Path},
};
use crate::{
evaluator::{self, Result},
lexer::{self, Token, TokenStream},
};
#[derive(Debug)]
pub struct Glob {
regex: Option<regex::bytes::Regex>,
pattern: Option<String>,
is_negated: bool,
}
impl Display for Glob {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.pattern {
Some(p) => write!(f, "{p}"),
None => write!(f, "None"),
}
}
}
impl Glob {
pub(crate) fn new(regex: regex::bytes::Regex, pattern: &str, is_negated: bool) -> Self {
Self {
regex: Some(regex),
pattern: Some(pattern.into()),
is_negated,
}
}
pub(crate) const fn empty() -> Self {
Self {
regex: None,
pattern: None,
is_negated: false,
}
}
#[must_use]
pub const fn is_empty(&self) -> bool {
self.regex.is_none()
}
#[must_use]
pub fn is_ignored(&self, path: impl AsRef<Path>) -> Option<bool> {
let regex = self.regex.as_ref()?;
let matched = regex.is_match(
path.as_ref()
.as_os_str()
.to_str()
.unwrap_or_default()
.as_bytes(),
);
if !matched {
log::trace!(
"{} did not match {:?} (via regular expression: {regex})",
path.as_ref().display(),
&self.pattern.as_ref(),
);
return None;
}
log::debug!(
"{} matched {:?} (via regular expression: {regex}). Is ignored: {}",
path.as_ref().display(),
self.pattern.as_ref(),
!self.is_negated
);
Some(!self.is_negated)
}
}
impl TryFrom<TokenStream> for Glob {
type Error = evaluator::Error;
fn try_from(value: TokenStream) -> Result<Self> {
if value.is_empty() {
return Ok(Self::empty());
}
if value.len() == 1 && matches!(value.first(), Some(Token::Comment(_))) {
return Ok(Self::empty());
}
let mut regex = String::new();
let mut tokens = value.iter().peekable();
let is_negated = tokens.next_if(|token| *token == &Token::Negation).is_some();
let mut is_relative_to_root = false;
let mut is_directory_only = false;
while let Some(token) = tokens.next() {
if *token == Token::DirectorySeparator && tokens.peek().is_some() {
is_relative_to_root = true;
if regex.is_empty() {
continue;
}
} else if token == &Token::DirectorySeparator && tokens.peek().is_none() {
is_directory_only = true;
}
match token {
Token::ExplicitLiteral(_) => regex.push_str(regex::escape(token.as_str()).as_str()),
Token::ImplicitLiteral(_) => {
let literal = if tokens.peek().is_none() {
token
.as_str()
.trim_end()
} else {
token.as_str()
};
regex.push_str(regex::escape(literal).as_str());
}
Token::Range(_) => {
regex.push('[');
regex.push_str(token.as_str());
regex.push(']');
}
Token::Comment(_) | Token::Negation => {
return Err(evaluator::Error::InvalidPattern {
pattern: value.into(),
source: None,
});
}
Token::Asterisk => {
regex.push_str(r"[^\\/]+");
}
Token::DoubleAsterisk => {
regex.push_str(r".*");
if regex.is_empty()
&& tokens
.next_if(|next| *next == &Token::DirectorySeparator)
.is_some()
{
is_relative_to_root = true;
regex.push(MAIN_SEPARATOR);
}
}
Token::DirectorySeparator
if tokens
.next_if(|next| *next == &Token::DoubleAsterisk)
.is_some() =>
{
if tokens.peek().is_none() {
regex.push_str(r"[\\/].*");
break;
}
if tokens
.next_if(|next| *next == &Token::DirectorySeparator)
.is_some()
{
regex.push_str(r"[\\/]([^\\/]+[\\/])*");
}
}
Token::DirectorySeparator => {
regex.push_str(r"[\\/]");
}
Token::QuestionMark => {
regex.push_str(r"[^\\/]");
}
}
}
if is_relative_to_root {
regex.insert(0, '^');
} else {
regex.insert_str(0, r"(?:^|[\\/])");
}
if !is_directory_only {
regex.push_str(r"(?:$|[\\/])");
}
let regex = regex::bytes::RegexBuilder::new(regex.as_str())
.size_limit(20_000)
.unicode(false)
.build()
.map_err(|e| evaluator::Error::InvalidRegex {
pattern: value.clone().into(),
regex: regex.as_str().into(),
source: e,
})?;
log::trace!(
"Converted pattern: {:?} into regex: {:?} (with negation: {})",
String::from(value.clone()),
regex,
is_negated
);
Ok(Self::new(
regex,
String::from(value.clone()).as_str(),
is_negated,
))
}
}
impl From<TokenStream> for String {
fn from(value: TokenStream) -> Self {
let mut pattern = Self::new();
for token in value.iter() {
match token {
Token::ExplicitLiteral(_) => {
pattern.push('\\');
pattern.push_str(token.as_str());
}
Token::Range(_) => {
pattern.push('[');
pattern.push_str(token.as_str());
pattern.push(']');
}
Token::Comment(_) => {
pattern.push('#');
pattern.push_str(token.as_str());
}
_ => pattern.push_str(token.as_str()),
}
}
pattern
}
}
impl TryFrom<&str> for Glob {
type Error = evaluator::Error;
fn try_from(value: &str) -> evaluator::Result<Self> {
if value.is_empty() {
return Ok(Self::empty());
}
let tokens = lexer::analyse(value).map_err(|e| evaluator::Error::InvalidPattern {
pattern: value.into(),
source: Some(e),
})?;
Self::try_from(tokens)
}
}
#[cfg(test)]
mod tests {
use insta::assert_snapshot;
use proptest::prelude::*;
use rstest::{Context, rstest};
use crate::utils;
#[rstest]
#[case(r"")]
#[case(r"# This is a comment")]
pub fn test_empty_globs(#[case] pattern: &str) {
let output = super::Glob::try_from(pattern)
.expect("Should never fail to build glob from empty or comment pattern");
assert!(output.is_empty());
}
#[rstest]
#[case(r"foo\", false)]
#[case(r"foo\ ", true)]
pub fn test_valid_vs_invalid_patterns(#[case] pattern: &str, #[case] expect_valid: bool) {
let output = super::Glob::try_from(pattern);
assert_eq!(output.is_ok(), expect_valid);
}
#[rstest]
#[case(r"build/")]
#[case(r"tmp/")]
#[case(r"vendor/")]
#[case(r"!vendor/keep.me")]
#[case(r"*.tmp")]
#[case(r"*.log")]
#[case(r"**/globfoo.txt")]
#[case(r"globdir/**")]
#[case(r"a/**/globbar.txt")]
#[case(r"/anchored.txt")]
#[case(r"dironly/")]
#[case(r"literal/file\*.txt")]
#[case(r"literal/file\?.txt")]
#[case(r"literal/file\[abc\].txt")]
#[case(r"precedence.log")]
#[case(r"!important.log")]
#[case(r"pruned/")]
#[case(r"!pruned/deep/keep.txt")]
#[case(r"double_negation/important.tmp")]
#[case(r"foo")]
#[case(r"file?.txt")]
#[case(r"file[abc].log")]
#[case(r"file[0-9].txt")]
pub fn test_glob_regexes_match_snapshot(#[context] ctx: Context, #[case] pattern: &str) {
let output =
super::Glob::try_from(pattern).expect("Should never fail to build glob pattern");
assert_snapshot!(
format!(
"{}_{}",
ctx.name,
ctx.case.expect("to provide description for test case")
),
output.regex.map(|r| r.to_string()).unwrap_or_default()
);
}
proptest! {
#[test]
fn test_building_never_panics(
pattern in utils::get_gitignore_pattern_fuzzing_strategy()
) {
let output = super::Glob::try_from(pattern.as_str());
prop_assert!(output.is_ok(), "Failed to build glob from pattern: {:?}", pattern);
}
}
}