use crate::parser::query;
use itertools::Itertools;
use std::fmt;
use winnow::{Parser, Result};
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Query<'src>(pub Vec<Term<'src>>);
impl<'src> TryFrom<&'src str> for Query<'src> {
type Error = anyhow::Error;
fn try_from(input: &'src str) -> std::result::Result<Self, Self::Error> {
query
.parse(input)
.map_err(move |e| anyhow::format_err!("{e}"))
}
}
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub enum Term<'src> {
Bare(&'src str),
Wildcard(&'src str),
Phrase(&'src str),
Literal(&'src str),
Not(Box<Term<'src>>),
Should(Vec<Term<'src>>),
Must(Vec<Term<'src>>),
Near(u8, Vec<Term<'src>>),
Within(u8, Vec<Term<'src>>),
Grouped(Vec<Term<'src>>),
}
impl<'src> Query<'src> {
pub fn parse(input: &'src str) -> Result<Query<'src>, anyhow::Error> {
Self::try_from(input)
}
#[must_use]
pub fn simplify(&self) -> Query<'src> {
let terms = self.0.clone();
if terms.len() == 1 {
match &terms[0] {
Term::Should(ts) if ts.len() == 1 => {
return Query(ts.clone());
}
Term::Grouped(ts) if ts.len() == 1 => {
return Query(ts.clone()).simplify();
}
_ => {}
}
}
let (shoulds, others): (Vec<_>, Vec<_>) = terms
.into_iter()
.partition(|t| matches!(t, Term::Should(_)));
let shoulds: Vec<Term<'src>> = shoulds
.into_iter()
.filter_map(|t| match t {
Term::Should(ts) => Some(ts),
_ => None,
})
.flatten()
.map(Term::simplify)
.unique()
.collect();
let mut new_terms: Vec<Term<'src>> =
others.into_iter().map(Term::simplify).unique().collect();
if !shoulds.is_empty() {
new_terms.push(Term::Should(shoulds));
return Query(new_terms);
}
let simplified: Vec<_> = new_terms.into_iter().map(Term::simplify).unique().collect();
if simplified.len() == 1 {
return match &simplified[0] {
Term::Grouped(ts) => Query(ts.iter().cloned().unique().collect()).simplify(),
Term::Should(_) => Query(simplified).simplify(),
_ => Query(simplified),
};
}
Query(simplified)
}
}
impl<'src> Term<'src> {
fn simplify(self) -> Term<'src> {
match self {
Term::Must(ts) | Term::Grouped(ts) => {
if ts.len() == 1 {
ts[0].clone().simplify()
} else {
Term::Grouped(ts.into_iter().map(Term::simplify).collect())
}
}
_ => self.clone(),
}
}
}
impl fmt::Display for Query<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let output = self
.0
.iter()
.map(ToString::to_string)
.collect::<Vec<String>>()
.join(" ");
write!(f, "{output}")
}
}
impl fmt::Display for Term<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let query_list = |qs: &Vec<Term>| {
qs.iter()
.map(ToString::to_string)
.collect::<Vec<String>>()
.join(" ")
};
let format_boolean = |op: char, queries: &Vec<Term>| {
if queries.len() == 1 {
format!("{op}{}", queries[0])
} else {
format!("{op}({})", query_list(queries))
}
};
let output = match self {
Self::Bare(s) => (*s).to_string(),
Self::Wildcard(s) => format!("{s}*"),
Self::Literal(s) => format!("'{s}'"),
Self::Phrase(s) => format!("\"{s}\""),
Self::Not(q) => format!("!{q}"),
Self::Should(queries) => format_boolean('~', queries),
Self::Must(queries) => format_boolean('+', queries),
Self::Near(slop, queries) => {
format!("n/{slop}({})", query_list(queries))
}
Self::Within(slop, queries) => {
format!("w/{slop}({})", query_list(queries))
}
Self::Grouped(queries) => format!("({})", query_list(queries)),
};
write!(f, "{output}")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bare_to_string() {
let q = Query(vec![Term::Bare("hello")]);
let result = q.to_string();
assert_eq!(result, "hello");
}
#[test]
fn wildcard_to_string() {
let q = Query(vec![Term::Wildcard("hello")]);
let result = q.to_string();
assert_eq!(result, "hello*");
}
#[test]
fn phrase_to_string() {
let q = Query(vec![Term::Phrase("hello world")]);
let result = q.to_string();
assert_eq!(result, "\"hello world\"");
}
#[test]
fn literal_to_string() {
let q = Query(vec![Term::Literal("hello world")]);
let result = q.to_string();
assert_eq!(result, "'hello world'");
}
#[test]
fn not_to_string() {
let q = Query(vec![Term::Not(Box::new(Term::Bare("pizza")))]);
let result = q.to_string();
assert_eq!(result, "!pizza");
}
#[test]
fn not_phrase_to_string() {
let q = Query(vec![Term::Not(Box::new(Term::Phrase("pizza time")))]);
let result = q.to_string();
assert_eq!(result, "!\"pizza time\"");
}
#[test]
fn nested_to_string() {
let query = Query(vec![Term::Should(vec![
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
])]);
let result = query.to_string();
let expected =
r#"~(n/20("dogs and cats" ~("rats" "mice")) n/20("dogs and cats" ~("rats" "mice")))"#;
assert_eq!(result, expected);
}
#[test]
fn multi_mixed_proximity_phrase_extraneous_parens() {
let mut input = r#"~((n/20("dogs and cats" ~("rats" "mice"))) (n/20("dogs and cats" ~("rats" "mice"))))"#;
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
let expected =
r#"~(n/20("dogs and cats" ~("rats" "mice")) n/20("dogs and cats" ~("rats" "mice")))"#;
assert_eq!(result, expected);
}
#[test]
fn bare_terms() {
let mut input = "elementary secondary";
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
let expected = "elementary secondary";
assert_eq!(result, expected);
}
#[test]
fn must_list() {
let mut input = "+elementary +secondary";
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
let expected = "+elementary +secondary";
assert_eq!(result, expected);
}
#[test]
fn must_group() {
let input = "+(elementary secondary)";
let query = Query::parse(input).unwrap();
let result = query.to_string();
let expected = "+(elementary secondary)";
assert_eq!(result, expected);
}
#[test]
fn simplify_single_top_level_must() {
let mut input = "+elementary";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "elementary";
assert_eq!(result, expected);
}
#[test]
fn simplify_top_level_must_list() {
let mut input = "+elementary +secondary";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "elementary secondary";
assert_eq!(result, expected);
}
#[test]
fn simplify_top_level_must_group() {
let mut input = "+(elementary secondary)";
let query = Query::parse(&mut input).unwrap().simplify();
let result = query.simplify().to_string();
let expected = "elementary secondary";
assert_eq!(result, expected);
}
#[test]
fn simplify_top_level_should_list() {
let mut input = "~elementary ~secondary";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "~(elementary secondary)";
assert_eq!(result, expected);
}
#[test]
fn remove_redundant_term() {
let mut input = "a b a";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a b";
assert_eq!(result, expected);
}
#[test]
fn simplify_redundant_term_with_should() {
let mut input = "a ~b a";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a ~b";
assert_eq!(result, expected);
}
#[test]
fn simplify_redundant_term_with_leading_should() {
let mut input = "~b a a";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a ~b";
assert_eq!(result, expected);
}
#[test]
fn simplify_removes_redundant_shoulds() {
let mut input = "~b a ~b";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a ~b";
assert_eq!(result, expected);
}
#[test]
fn simplify_removes_and_simplifies_redundant_musts() {
let mut input = "+b a +b";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "b a";
assert_eq!(result, expected);
}
#[test]
fn simplify_removes_extraneous_should() {
let mut input = "~a";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a";
assert_eq!(result, expected);
}
#[test]
fn should_with_nested_parens_to_string() {
let mut input = "~((dogs cats))";
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
assert_eq!(result, "~(dogs cats)");
}
#[test]
fn simplify_should_with_nested_parens_to_string() {
let mut input = "~((dogs cats))";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
assert_eq!(result, "~(dogs cats)");
}
#[test]
fn should_with_multi_nested_parens_to_string() {
let mut input = "~((dogs cats) (rats mice))";
let query = Query::parse(&mut input).unwrap();
let result = query.to_string();
assert_eq!(result, "~(dogs cats rats mice)");
}
#[test]
fn simplify_should_with_multi_nested_parens_to_string() {
let mut input = "~((dogs cats) (rats mice))";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
assert_eq!(result, "~(dogs cats rats mice)");
}
#[test]
fn simplify_removes_extraneous_should_with_parens() {
let mut input = "~(a)";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a";
assert_eq!(result, expected);
}
#[test]
fn simplify_removes_extraneous_parens_with_should() {
let mut input = "(~a)";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
let expected = "a";
assert_eq!(result, expected);
}
#[test]
fn simplify_extraneous_parens_with_should() {
let mut input = "(~a)";
let result = Query::parse(&mut input).unwrap().simplify();
let expected = Query(vec![Term::Bare("a")]);
assert_eq!(result, expected);
}
#[test]
fn query_try_from_bad_wildcard_err() {
let result = Query::try_from("*izza").unwrap_err();
let error = "\
*izza
^
";
assert_eq!(result.to_string(), error)
}
#[test]
fn simplify_groups_shoulds() {
let mut input = "~a b ~c";
let result = Query::parse(&mut input).unwrap().simplify().to_string();
let expected = "b ~(a c)";
assert_eq!(result, expected);
}
#[test]
fn simplify_groups_shoulds_multiple_musts() {
let mut input = "~a +b ~c +d";
let result = Query::parse(&mut input).unwrap().simplify().to_string();
let expected = "b d ~(a c)";
assert_eq!(result, expected);
}
#[test]
fn simplify_multiple_should_groups() {
let mut input = "~(dogs cats) ~(rats mice)";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
assert_eq!(result, "~(dogs cats rats mice)");
}
#[test]
fn simplify_multiple_should_groups_with_other_terms() {
let mut input = "~(dogs cats) ~(rats mice) other stuff";
let query = Query::parse(&mut input).unwrap();
let result = query.simplify().to_string();
assert_eq!(result, "other stuff ~(dogs cats rats mice)");
}
}