use crate::parser::query;
use itertools::Itertools;
use std::fmt;
use winnow::{Parser, Result};
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Query<'src>(pub Vec<Term<'src>>);
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub enum Term<'src> {
Bare(&'src str),
Wildcard(&'src str),
Phrase(&'src str),
Literal(&'src str),
Not(Box<Term<'src>>),
Should(Vec<Term<'src>>),
Must(Vec<Term<'src>>),
Near(u8, Vec<Term<'src>>),
Within(u8, Vec<Term<'src>>),
Grouped(Vec<Term<'src>>),
}
impl<'src> Query<'src> {
pub fn parse(input: &mut &'src str) -> Result<Query<'src>> {
query.parse_next(input)
}
#[must_use]
pub fn simplify(&self) -> Query<'src> {
let terms = self.0.clone();
if terms.iter().all(|t| matches!(t, Term::Should(_))) {
if terms.len() == 1 {
if let Term::Should(ts) = &terms[0]
&& ts.len() == 1
{
return Query(ts.clone());
}
return self.to_owned();
}
return Query(vec![Term::Should(
terms
.iter()
.map(Term::lift)
.map(Term::simplify)
.unique()
.collect(),
)]);
}
let simplified: Vec<_> = terms.into_iter().map(Term::simplify).unique().collect();
if simplified.len() == 1 {
return match &simplified[0] {
Term::Grouped(ts) => Query(ts.iter().cloned().unique().collect()).simplify(),
Term::Should(_) => Query(simplified).simplify(),
_ => Query(simplified),
};
}
Query(simplified)
}
}
impl<'src> Term<'src> {
fn simplify(self) -> Term<'src> {
match self {
Term::Must(ts) | Term::Grouped(ts) => {
if ts.len() == 1 {
ts[0].clone().simplify()
} else {
Term::Grouped(ts.into_iter().map(Term::simplify).collect())
}
}
_ => self.clone(),
}
}
fn lift(&self) -> Term<'src> {
match self {
Term::Should(ts) | Term::Must(ts) => {
Term::Grouped(ts.iter().cloned().unique().collect())
}
_ => self.to_owned(),
}
}
}
impl fmt::Display for Query<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let output = self
.0
.iter()
.map(ToString::to_string)
.collect::<Vec<String>>()
.join(" ");
write!(f, "{output}")
}
}
impl fmt::Display for Term<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let query_list = |qs: &Vec<Term>| {
qs.iter()
.map(ToString::to_string)
.collect::<Vec<String>>()
.join(" ")
};
let format_boolean = |op: char, queries: &Vec<Term>| {
if queries.len() == 1 {
format!("{op}{}", queries[0])
} else {
format!("{op}({})", query_list(queries))
}
};
let output = match self {
Self::Bare(s) => (*s).to_string(),
Self::Wildcard(s) => format!("{s}*"),
Self::Literal(s) => format!("'{s}'"),
Self::Phrase(s) => format!("\"{s}\""),
Self::Not(q) => format!("!{q}"),
Self::Should(queries) => format_boolean('~', queries),
Self::Must(queries) => format_boolean('+', queries),
Self::Near(slop, queries) => {
format!("n/{slop}({})", query_list(queries))
}
Self::Within(slop, queries) => {
format!("w/{slop}({})", query_list(queries))
}
Self::Grouped(queries) => format!("({})", query_list(queries)),
};
write!(f, "{output}")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bare_to_string() {
let q = Query(vec![Term::Bare("hello")]);
let result = q.to_string();
assert_eq!(result, "hello");
}
#[test]
fn wildcard_to_string() {
let q = Query(vec![Term::Wildcard("hello")]);
let result = q.to_string();
assert_eq!(result, "hello*");
}
#[test]
fn phrase_to_string() {
let q = Query(vec![Term::Phrase("hello world")]);
let result = q.to_string();
assert_eq!(result, "\"hello world\"");
}
#[test]
fn literal_to_string() {
let q = Query(vec![Term::Literal("hello world")]);
let result = q.to_string();
assert_eq!(result, "'hello world'");
}
#[test]
fn not_to_string() {
let q = Query(vec![Term::Not(Box::new(Term::Bare("pizza")))]);
let result = q.to_string();
assert_eq!(result, "!pizza");
}
#[test]
fn not_phrase_to_string() {
let q = Query(vec![Term::Not(Box::new(Term::Phrase("pizza time")))]);
let result = q.to_string();
assert_eq!(result, "!\"pizza time\"");
}
#[test]
fn nested_to_string() {
let query = Query(vec![Term::Should(vec![
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
])]);
let result = query.to_string();
let expected =
r#"~(n/20("dogs and cats" ~("rats" "mice")) n/20("dogs and cats" ~("rats" "mice")))"#;
assert_eq!(result, expected);
}
#[test]
fn multi_mixed_proximity_phrase_extraneous_parens() {
let mut input = r#"~((n/20("dogs and cats" ~("rats" "mice"))) (n/20("dogs and cats" ~("rats" "mice"))))"#;
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
let expected =
r#"~(n/20("dogs and cats" ~("rats" "mice")) n/20("dogs and cats" ~("rats" "mice")))"#;
assert_eq!(result, expected);
}
#[test]
fn bare_terms() {
let mut input = "elementary secondary";
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
let expected = "elementary secondary";
assert_eq!(result, expected);
}
#[test]
fn must_list() {
let mut input = "+elementary +secondary";
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
let expected = "+elementary +secondary";
assert_eq!(result, expected);
}
#[test]
fn must_group() {
let mut input = "+(elementary secondary)";
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
let expected = "+(elementary secondary)";
assert_eq!(result, expected);
}
#[test]
fn simplify_single_top_level_must() {
let mut input = "+elementary";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "elementary";
assert_eq!(result, expected);
}
#[test]
fn simplify_top_level_must_list() {
let mut input = "+elementary +secondary";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "elementary secondary";
assert_eq!(result, expected);
}
#[test]
fn simplify_top_level_must_group() {
let mut input = "+(elementary secondary)";
let query = Query::parse(&mut input).unwrap().simplify();
let result = query.simplify().to_string();
let expected = "elementary secondary";
assert_eq!(result, expected);
}
#[test]
fn simplify_top_level_should_list() {
let mut input = "~elementary ~secondary";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "~(elementary secondary)";
assert_eq!(result, expected);
}
#[test]
fn remove_redundant_term() {
let mut input = "a b a";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a b";
assert_eq!(result, expected);
}
#[test]
fn simplify_redundant_term_with_should() {
let mut input = "a ~b a";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a ~b";
assert_eq!(result, expected);
}
#[test]
fn simplify_redundant_term_with_leading_should() {
let mut input = "~b a a";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "~b a";
assert_eq!(result, expected);
}
#[test]
fn simplify_removes_redundant_shoulds() {
let mut input = "~b a ~b";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "~b a";
assert_eq!(result, expected);
}
#[test]
fn simplify_removes_and_simplifies_redundant_musts() {
let mut input = "+b a +b";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "b a";
assert_eq!(result, expected);
}
#[test]
fn simplify_removes_extraneous_should() {
let mut input = "~a";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a";
assert_eq!(result, expected);
}
#[test]
fn should_with_nested_parens_to_string() {
let mut input = "~((dogs cats))";
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
assert_eq!(result, "~(dogs cats)");
}
#[test]
fn simplify_should_with_nested_parens_to_string() {
let mut input = "~((dogs cats))";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
assert_eq!(result, "~(dogs cats)");
}
#[test]
fn should_with_multi_nested_parens_to_string() {
let mut input = "~((dogs cats) (rats mice))";
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
assert_eq!(result, "~(dogs cats rats mice)");
}
#[test]
fn simplify_should_with_multi_nested_parens_to_string() {
let mut input = "~((dogs cats) (rats mice))";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
assert_eq!(result, "~(dogs cats rats mice)");
}
#[test]
fn simplify_removes_extraneous_should_with_parens() {
let mut input = "~(a)";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a";
assert_eq!(result, expected);
}
#[test]
fn simplify_removes_extraneous_parens_with_should() {
let mut input = "(~a)";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a";
assert_eq!(result, expected);
}
#[test]
fn simplify_extraneous_parens_with_should() {
let mut input = "(~a)";
let result = Query::parse(&mut input).unwrap().simplify();
let expected = Query(vec![Term::Bare("a")]);
assert_eq!(result, expected);
}
}