#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedQuery {
pub positive: Vec<String>,
pub negative: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum InvalidQuery {
#[error(
"FTS query must contain at least one positive term; \
a NOT-only query (e.g. 'NOT python') is not supported"
)]
NegativeOnly,
#[error(
"FTS query contains parenthesised NOT groups which are not supported; \
use flat negations instead, e.g. 'rust NOT python NOT ruby' \
rather than 'rust NOT (python OR ruby)'"
)]
ParenthesesNotSupported,
}
pub fn parse_query(query: &str) -> Result<ParsedQuery, InvalidQuery> {
let mut positive = Vec::new();
let mut negative = Vec::new();
let tokens: Vec<&str> = query.split_whitespace().collect();
let mut i = 0;
while i < tokens.len() {
let tok = tokens[i];
if tok == "NOT" {
i += 1;
if i >= tokens.len() {
break;
}
let next = tokens[i];
if next.starts_with('(') {
return Err(InvalidQuery::ParenthesesNotSupported);
}
negative.push(next.to_string());
} else if let Some(stripped) = tok.strip_prefix('-') {
if stripped.is_empty() {
positive.push(tok.to_string());
} else {
negative.push(stripped.to_string());
}
} else {
positive.push(tok.to_string());
}
i += 1;
}
if positive.is_empty() && !negative.is_empty() {
return Err(InvalidQuery::NegativeOnly);
}
Ok(ParsedQuery { positive, negative })
}
#[cfg(test)]
mod tests {
use super::*;
fn pos(terms: &[&str]) -> Vec<String> {
terms.iter().map(|s| s.to_string()).collect()
}
#[test]
fn simple_positive_terms() {
let pq = parse_query("rust python").unwrap();
assert_eq!(pq.positive, pos(&["rust", "python"]));
assert!(pq.negative.is_empty());
}
#[test]
fn not_keyword() {
let pq = parse_query("rust NOT python").unwrap();
assert_eq!(pq.positive, pos(&["rust"]));
assert_eq!(pq.negative, pos(&["python"]));
}
#[test]
fn dash_prefix() {
let pq = parse_query("rust -python").unwrap();
assert_eq!(pq.positive, pos(&["rust"]));
assert_eq!(pq.negative, pos(&["python"]));
}
#[test]
fn multiple_negations() {
let pq = parse_query("rust NOT python NOT ruby").unwrap();
assert_eq!(pq.positive, pos(&["rust"]));
assert_eq!(pq.negative, pos(&["python", "ruby"]));
}
#[test]
fn multiple_dash_negations() {
let pq = parse_query("rust -python -ruby").unwrap();
assert_eq!(pq.positive, pos(&["rust"]));
assert_eq!(pq.negative, pos(&["python", "ruby"]));
}
#[test]
fn negative_only_returns_error() {
let err = parse_query("NOT python").unwrap_err();
assert_eq!(err, InvalidQuery::NegativeOnly);
}
#[test]
fn dash_only_negative_returns_error() {
let err = parse_query("-python").unwrap_err();
assert_eq!(err, InvalidQuery::NegativeOnly);
}
#[test]
fn parentheses_after_not_returns_error() {
let err = parse_query("rust NOT (python OR ruby)").unwrap_err();
assert_eq!(err, InvalidQuery::ParenthesesNotSupported);
}
#[test]
fn bare_dash_treated_as_positive() {
let pq = parse_query("hello - world").unwrap();
assert_eq!(pq.positive, pos(&["hello", "-", "world"]));
assert!(pq.negative.is_empty());
}
#[test]
fn trailing_not_ignored() {
let pq = parse_query("rust NOT").unwrap();
assert_eq!(pq.positive, pos(&["rust"]));
assert!(pq.negative.is_empty());
}
#[test]
fn no_positive_only_negatives_multiple() {
let err = parse_query("NOT python NOT ruby").unwrap_err();
assert_eq!(err, InvalidQuery::NegativeOnly);
}
}