use std::ops::Deref;
use crate::lexer;
mod error;
mod types;
pub use error::Error;
pub use types::Result;
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct TokenStream(Vec<Token>);
impl Deref for TokenStream {
type Target = Vec<Token>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
#[non_exhaustive]
pub enum Token {
Negation,
ExplicitLiteral(Vec<u8>),
ImplicitLiteral(Vec<u8>),
DirectorySeparator,
Asterisk,
DoubleAsterisk,
QuestionMark,
Range(Vec<u8>),
Comment(Vec<u8>),
}
impl Token {
#[must_use]
pub fn as_str(&self) -> &str {
match &self {
Self::Negation => "!",
Self::ExplicitLiteral(bytes) | Self::ImplicitLiteral(bytes) => {
str::from_utf8(bytes).unwrap_or_default()
}
Self::DirectorySeparator => "/",
Self::Asterisk => "*",
Self::DoubleAsterisk => "**",
Self::QuestionMark => "?",
Self::Range(range) => str::from_utf8(range).unwrap_or_default(),
Self::Comment(comment) => str::from_utf8(comment).unwrap_or_default(),
}
}
}
pub fn analyse(pattern: &str) -> lexer::Result<TokenStream> {
let mut tokens = Vec::<Token>::new();
let mut iter = pattern.bytes().peekable();
while let Some(char) = iter.next() {
if char == b'\\' {
let Some(literal) = iter.next() else {
return Err(lexer::Error::InvalidPattern(pattern.into()));
};
tokens.push(Token::ExplicitLiteral(vec![literal]));
continue;
}
if tokens.is_empty() && char == b'#' {
tokens.push(Token::Comment(iter.collect()));
break;
}
if tokens.is_empty() && char == b'!' {
tokens.push(Token::Negation);
continue;
}
if char == b'/' {
tokens.push(Token::DirectorySeparator);
continue;
}
if char == b'?' {
tokens.push(Token::QuestionMark);
continue;
}
if char == b'*' {
tokens.push(match iter.next_if(|char| *char == b'*') {
Some(_) => Token::DoubleAsterisk,
None => Token::Asterisk,
});
continue;
}
if char == b'[' {
let mut range = Vec::<u8>::new();
loop {
let Some(next) = iter.next() else {
return Err(lexer::Error::InvalidPattern(pattern.into()));
};
if next == b']' {
tokens.push(Token::Range(range));
break;
}
range.push(next);
}
continue;
}
if let Some(Token::ImplicitLiteral(chars)) = tokens.last_mut() {
chars.push(char);
} else {
tokens.push(Token::ImplicitLiteral(vec![char]));
}
}
Ok(TokenStream(tokens))
}
#[cfg(test)]
mod tests {
use proptest::prelude::*;
use rstest::rstest;
use crate::utils;
use super::*;
#[rstest]
#[case(
r"hello",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b'h', b'e', b'l', b'l', b'o']),
]),
))]
#[case(
r"hello\world",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b'h', b'e', b'l', b'l', b'o']),
Token::ExplicitLiteral(vec![b'w']),
Token::ImplicitLiteral(vec![b'o', b'r', b'l', b'd']),
]),
))]
#[case(
r"hello\",
Err(lexer::Error::InvalidPattern(r"hello\".into()))
)]
#[case(
r"!foo",
Ok(TokenStream(vec![
Token::Negation,
Token::ImplicitLiteral(vec![b'f', b'o', b'o']),
]),
))]
#[case(
r"\!foo",
Ok(TokenStream(vec![
Token::ExplicitLiteral(vec![b'!']),
Token::ImplicitLiteral(vec![b'f', b'o', b'o'])
]),
))]
#[case(
r"fo!o",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b'f', b'o', b'!', b'o']),
]),
))]
#[case(
r"!fo!o",
Ok(TokenStream(vec![
Token::Negation,
Token::ImplicitLiteral(vec![b'f', b'o', b'!', b'o']),
]),
))]
#[case(
r"hello/world/",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b'h', b'e', b'l', b'l', b'o']),
Token::DirectorySeparator,
Token::ImplicitLiteral(vec![b'w', b'o', b'r', b'l', b'd']),
Token::DirectorySeparator,
]),
))]
#[case(
r"hello/?/world/",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b'h', b'e', b'l', b'l', b'o']),
Token::DirectorySeparator,
Token::QuestionMark,
Token::DirectorySeparator,
Token::ImplicitLiteral(vec![b'w', b'o', b'r', b'l', b'd']),
Token::DirectorySeparator,
]),
))]
#[case(
r"**",
Ok(TokenStream(vec![
Token::DoubleAsterisk,
]),
))]
#[case(
r"/foo/**/*bar",
Ok(TokenStream(vec![
Token::DirectorySeparator,
Token::ImplicitLiteral(vec![b'f', b'o', b'o']),
Token::DirectorySeparator,
Token::DoubleAsterisk,
Token::DirectorySeparator,
Token::Asterisk,
Token::ImplicitLiteral(vec![b'b', b'a', b'r']),
]),
))]
#[case(
r"/*****",
Ok(TokenStream(vec![
Token::DirectorySeparator,
Token::DoubleAsterisk,
Token::DoubleAsterisk,
Token::Asterisk,
]),
))]
#[case(
r"hello/[a-zA-Z0-9]/world",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b'h', b'e', b'l', b'l', b'o']),
Token::DirectorySeparator,
Token::Range(vec![b'a', b'-', b'z', b'A', b'-', b'Z', b'0', b'-', b'9']),
Token::DirectorySeparator,
Token::ImplicitLiteral(vec![b'w', b'o', b'r', b'l', b'd']),
]),
))]
#[case(
r"hello/[a-/world",
Err(lexer::Error::InvalidPattern(r"hello/[a-/world".into()))
)]
#[case(
r"/hello/[a-zA-Z0-9]/world/",
Ok(TokenStream(vec![
Token::DirectorySeparator,
Token::ImplicitLiteral(vec![b'h', b'e', b'l', b'l', b'o']),
Token::DirectorySeparator,
Token::Range(vec![b'a', b'-', b'z', b'A', b'-', b'Z', b'0', b'-', b'9']),
Token::DirectorySeparator,
Token::ImplicitLiteral(vec![b'w', b'o', b'r', b'l', b'd']),
Token::DirectorySeparator,
])),
)]
#[case(
r"# Hello World",
Ok(TokenStream(vec![
Token::Comment(vec![b' ', b'H', b'e', b'l', b'l', b'o', b' ', b'W', b'o', b'r', b'l', b'd'])
])),
)]
#[case(
r"!deep_keep.log",
Ok(TokenStream(vec![
Token::Negation,
Token::ImplicitLiteral(vec![b'd', b'e', b'e', b'p', b'_', b'k', b'e', b'e', b'p', b'.', b'l', b'o', b'g'])
])),
)]
#[case(
r"foo ",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b'f', b'o', b'o', b' '])
])),
)]
#[case(
r"foo \ ",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b'f', b'o', b'o', b' ']),
Token::ExplicitLiteral(vec![b' ']),
])),
)]
#[case(
r"",
Ok(TokenStream(vec![])),
)]
#[case(
r" ",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b' ', b' ', b' '])
])),
)]
#[case(
r"fileƩ.txt",
Ok(TokenStream(vec![
Token::ImplicitLiteral(vec![b'f', b'i', b'l', b'e', 0xc3, 0xa9, b'.', b't', b'x', b't'])
])),
)]
pub fn test_lexing(#[case] pattern: &str, #[case] expected_output: Result<TokenStream>) {
let output = super::analyse(pattern);
assert_eq!(output, expected_output);
if let Ok(token_stream) = output {
assert_eq!(String::from(token_stream), pattern.to_string());
}
}
proptest! {
#[test]
fn test_lexing_never_panics(
pattern in utils::get_gitignore_pattern_fuzzing_strategy()
) {
let output = super::analyse(&pattern);
prop_assert!(output.is_ok(), "Failed to lex pattern: {:?}", pattern);
}
}
}