use super::parser::parse_filter;
use super::types::{FilterError, Token};
use std::iter::Peekable;
use std::str::CharIndices;
#[must_use]
pub fn starts_with_keyword(s: &str, keyword: &str) -> bool {
s.chars()
.zip(keyword.chars())
.all(|(a, b)| a.eq_ignore_ascii_case(&b))
&& s.chars().count() >= keyword.len()
}
#[inline]
fn starts_with_and(s: &str) -> bool {
starts_with_keyword(s, "AND")
}
#[inline]
fn starts_with_or(s: &str) -> bool {
starts_with_keyword(s, "OR")
}
#[inline]
fn starts_with_not(s: &str) -> bool {
starts_with_keyword(s, "NOT")
}
pub fn tokenize(input: &str) -> Result<Vec<Token>, FilterError> {
let mut tokens = Vec::new();
let mut chars = input.char_indices().peekable();
while let Some((start_idx, ch)) = chars.next() {
match ch {
' ' | '\t' | '\n' | '\r' => {}
'(' => tokens.push(Token::LeftParen),
')' => tokens.push(Token::RightParen),
'A' | 'a' | 'O' | 'o' | 'N' | 'n' => {
if let Some((keyword_token, _)) = try_parse_keyword(input, start_idx, &mut chars) {
tokens.push(keyword_token);
} else {
let filter = parse_filter_token(input, start_idx, &mut chars)?;
tokens.push(Token::Filter(filter));
}
}
_ => {
let filter = parse_filter_token(input, start_idx, &mut chars)?;
tokens.push(Token::Filter(filter));
}
}
}
Ok(tokens)
}
fn try_parse_keyword(
input: &str,
start_idx: usize,
chars: &mut Peekable<CharIndices>,
) -> Option<(Token, usize)> {
let remaining = &input[start_idx..];
let (keyword, keyword_len) = if starts_with_and(remaining) {
(Some(Token::And), 3)
} else if starts_with_not(remaining) {
(Some(Token::Not), 3)
} else if starts_with_or(remaining) {
(Some(Token::Or), 2)
} else {
return None;
};
if let Some(token) = keyword {
let before_ok = start_idx == 0
|| input[..start_idx]
.chars()
.last()
.is_none_or(|c| c.is_whitespace() || c == '(');
let after_ok = if remaining.len() == keyword_len {
true } else {
remaining
.chars()
.nth(keyword_len)
.is_none_or(|c| c.is_whitespace() || c == '(' || c == ')')
};
if before_ok && after_ok {
for _ in 0..keyword_len {
chars.next();
}
return Some((token, keyword_len));
}
}
None
}
fn parse_filter_token(
input: &str,
start_idx: usize,
chars: &mut Peekable<CharIndices>,
) -> Result<String, FilterError> {
let mut filter_chars = Vec::new();
let mut in_quotes = false;
let mut escape_next = false;
if let Some(ch) = input[start_idx..].chars().next() {
filter_chars.push(ch);
if ch == '"' {
in_quotes = true;
}
}
while let Some(&(idx, ch)) = chars.peek() {
if escape_next {
filter_chars.push(ch);
escape_next = false;
chars.next();
continue;
}
match ch {
'\\' if in_quotes => {
filter_chars.push(ch);
escape_next = true;
chars.next();
}
'"' => {
filter_chars.push(ch);
in_quotes = !in_quotes;
chars.next();
}
'(' | ')' if !in_quotes => {
break;
}
_ if !in_quotes && ch.is_whitespace() => {
if looks_like_keyword_at(input, idx) {
break;
}
filter_chars.push(ch);
chars.next();
}
_ if !in_quotes && ch.is_alphabetic() => {
if looks_like_keyword_at(input, idx) {
break;
}
filter_chars.push(ch);
chars.next();
}
_ => {
filter_chars.push(ch);
chars.next();
}
}
}
if in_quotes {
return Err(FilterError::UnclosedQuote(format!(
"Unclosed quote in filter starting at position {start_idx}"
)));
}
let filter_str: String = filter_chars.iter().collect();
let filter_str = filter_str.trim();
if filter_str.is_empty() {
return Err(FilterError::InvalidSyntax(
"Empty filter condition".to_string(),
));
}
parse_filter(filter_str)?;
Ok(filter_str.to_string())
}
fn looks_like_keyword_at(input: &str, pos: usize) -> bool {
let remaining = &input[pos..];
let remaining = remaining.trim_start();
if remaining.is_empty() {
return false;
}
let (keyword, keyword_len) = if starts_with_and(remaining) || starts_with_not(remaining) {
(true, 3)
} else if starts_with_or(remaining) {
(true, 2)
} else {
return false;
};
if !keyword {
return false;
}
if remaining.len() == keyword_len {
return true; }
remaining
.chars()
.nth(keyword_len)
.is_none_or(|c| c.is_whitespace() || c == '(' || c == ')')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenize_simple_filter() {
let tokens = tokenize("publish=false").unwrap();
assert_eq!(tokens, vec![Token::Filter("publish=false".to_string())]);
}
#[test]
fn test_tokenize_simple_filter_with_nested_path() {
let tokens = tokenize("package.publish=false").unwrap();
assert_eq!(
tokens,
vec![Token::Filter("package.publish=false".to_string())]
);
}
#[test]
fn test_tokenize_and_expression() {
let tokens = tokenize("package.publish=false AND package.version^=0.1").unwrap();
assert_eq!(
tokens,
vec![
Token::Filter("package.publish=false".to_string()),
Token::And,
Token::Filter("package.version^=0.1".to_string()),
]
);
}
#[test]
fn test_tokenize_or_expression() {
let tokens = tokenize("package.publish=false OR package.version^=0.1").unwrap();
assert_eq!(
tokens,
vec![
Token::Filter("package.publish=false".to_string()),
Token::Or,
Token::Filter("package.version^=0.1".to_string()),
]
);
}
#[test]
fn test_tokenize_not_expression() {
let tokens = tokenize("NOT package.publish=false").unwrap();
assert_eq!(
tokens,
vec![
Token::Not,
Token::Filter("package.publish=false".to_string()),
]
);
}
#[test]
fn test_tokenize_parentheses() {
let tokens = tokenize("(package.publish=false OR package.version^=0.1)").unwrap();
assert_eq!(
tokens,
vec![
Token::LeftParen,
Token::Filter("package.publish=false".to_string()),
Token::Or,
Token::Filter("package.version^=0.1".to_string()),
Token::RightParen,
]
);
}
#[test]
fn test_tokenize_complex_expression() {
let tokens = tokenize(
"(package.publish=false OR package.name$=_example) AND package.categories@=audio",
)
.unwrap();
assert_eq!(
tokens,
vec![
Token::LeftParen,
Token::Filter("package.publish=false".to_string()),
Token::Or,
Token::Filter("package.name$=_example".to_string()),
Token::RightParen,
Token::And,
Token::Filter("package.categories@=audio".to_string()),
]
);
}
#[test]
fn test_tokenize_quoted_value() {
let tokens = tokenize(r#"name="test package""#).unwrap();
assert_eq!(
tokens,
vec![Token::Filter(r#"name="test package""#.to_string())]
);
}
#[test]
fn test_tokenize_quoted_with_keyword() {
let tokens = tokenize(r#"description="This AND that""#).unwrap();
assert_eq!(
tokens,
vec![Token::Filter(r#"description="This AND that""#.to_string())]
);
}
#[test]
fn test_tokenize_quoted_with_escaped_quote() {
let tokens = tokenize(r#"title="Quote: \"test\"""#).unwrap();
assert_eq!(
tokens,
vec![Token::Filter(r#"title="Quote: \"test\"""#.to_string())]
);
}
#[test]
fn test_tokenize_case_insensitive_keywords() {
let tokens =
tokenize("package.publish=false and package.version^=0.1 OR package.name=test")
.unwrap();
assert_eq!(
tokens,
vec![
Token::Filter("package.publish=false".to_string()),
Token::And,
Token::Filter("package.version^=0.1".to_string()),
Token::Or,
Token::Filter("package.name=test".to_string()),
]
);
}
#[test]
fn test_tokenize_keyword_in_value() {
let tokens = tokenize("brand=ANDROID").unwrap();
assert_eq!(tokens, vec![Token::Filter("brand=ANDROID".to_string())]);
}
#[test]
fn test_tokenize_unclosed_quote() {
let result = tokenize(r#"name="unclosed"#);
assert!(matches!(result, Err(FilterError::UnclosedQuote(_))));
}
#[test]
fn test_tokenize_standalone_keyword() {
use super::super::expression_parser::parse_expression;
let tokens = tokenize("AND").unwrap();
assert_eq!(tokens, vec![Token::And]);
let result = parse_expression("AND");
assert!(matches!(result, Err(FilterError::UnexpectedToken(_))));
}
#[test]
fn test_starts_with_keyword_generic() {
assert!(starts_with_keyword("AND", "AND"));
assert!(starts_with_keyword("and", "AND"));
assert!(starts_with_keyword("AnD", "AND"));
assert!(starts_with_keyword("ANDROID", "AND"));
assert!(!starts_with_keyword("AN", "AND")); assert!(!starts_with_keyword("", "AND"));
assert!(!starts_with_keyword("OR", "AND"));
assert!(starts_with_keyword("OR", "OR"));
assert!(starts_with_keyword("NOT", "NOT"));
assert!(!starts_with_keyword("AND", "AND"));
assert!(!starts_with_keyword("名前", "AND"));
}
#[test]
fn test_starts_with_and() {
assert!(starts_with_and("AND"));
assert!(starts_with_and("AND "));
assert!(starts_with_and("and"));
assert!(starts_with_and("AnD"));
assert!(starts_with_and("AND rest of string"));
assert!(!starts_with_and("AN")); assert!(starts_with_and("ANDROID")); assert!(!starts_with_and("OR"));
assert!(!starts_with_and(""));
assert!(!starts_with_and("名前")); }
#[test]
fn test_starts_with_or() {
assert!(starts_with_or("OR"));
assert!(starts_with_or("or"));
assert!(starts_with_or("Or"));
assert!(starts_with_or("OR more"));
assert!(!starts_with_or("O")); assert!(!starts_with_or("AND"));
assert!(!starts_with_or(""));
}
#[test]
fn test_starts_with_not() {
assert!(starts_with_not("NOT"));
assert!(starts_with_not("not"));
assert!(starts_with_not("NoT"));
assert!(starts_with_not("NOT more"));
assert!(!starts_with_not("NO")); assert!(!starts_with_not("AND"));
assert!(!starts_with_not(""));
}
#[test]
fn test_unicode_does_not_match_keywords() {
assert!(!starts_with_and("AND"));
assert!(!starts_with_or("OR"));
assert!(!starts_with_not("NOT"));
assert!(!starts_with_and("名前"));
assert!(!starts_with_or("おはよう"));
}
}