use crate::query::{Query, Term};
use winnow::ascii::{alphanumeric1, digit1, multispace0, multispace1};
use winnow::combinator::{alt, delimited, not, preceded, separated, terminated};
use winnow::token::take_while;
use winnow::{Parser, Result};
pub(crate) fn query<'src>(input: &mut &'src str) -> Result<Query<'src>> {
terms.map(Query).parse_next(input)
}
fn terms<'src>(input: &mut &'src str) -> Result<Vec<Term<'src>>> {
separated(1.., top, multispace1).parse_next(input)
}
fn top<'src>(input: &mut &'src str) -> Result<Term<'src>> {
alt((paren_top, proximity, boolean, term, not_)).parse_next(input)
}
fn paren_top<'src>(input: &mut &'src str) -> Result<Term<'src>> {
delimited(
"(",
preceded(multispace0, terminated(terms, multispace0)),
")",
)
.map(Term::Grouped)
.parse_next(input)
}
fn proximity<'src>(input: &mut &'src str) -> Result<Term<'src>> {
let (prox_type, _, slop, args) = (
alt(('w', 'W', 'n', 'N')),
'/',
digit1.parse_to(),
alt((prox_args, paren_prox_args)),
)
.parse_next(input)?;
if prox_type == 'w' || prox_type == 'W' {
Ok(Term::Within(slop, args))
} else {
Ok(Term::Near(slop, args))
}
}
fn boolean<'src>(input: &mut &'src str) -> Result<Term<'src>> {
let (bool_op, args) =
(alt(('+', '~')), alt((bool_arg.map(|q| vec![q]), bool_args))).parse_next(input)?;
if bool_op == '+' {
Ok(Term::Must(args))
} else {
Ok(Term::Should(args))
}
}
fn bool_arg<'src>(input: &mut &'src str) -> Result<Term<'src>> {
alt((proximity, term)).parse_next(input)
}
fn bool_args<'src>(input: &mut &'src str) -> Result<Vec<Term<'src>>> {
alt((paren_bool_args, separated(1.., bool_arg, multispace1))).parse_next(input)
}
fn paren_bool_args<'src>(input: &mut &'src str) -> Result<Vec<Term<'src>>> {
separated(
1..,
delimited(
"(",
preceded(multispace0, terminated(bool_args, multispace0)),
")",
),
multispace1,
)
.map(|v: Vec<Vec<Term>>| v.into_iter().flatten().collect())
.parse_next(input)
}
fn prox_args<'src>(input: &mut &'src str) -> Result<Vec<Term<'src>>> {
alt((
separated(1.., literal, multispace1),
separated(1.., alt((proximity, boolean, nonliteral)), multispace1),
))
.parse_next(input)
}
fn paren_prox_args<'src>(input: &mut &'src str) -> Result<Vec<Term<'src>>> {
delimited(
"(",
preceded(multispace0, terminated(prox_args, multispace0)),
")",
)
.parse_next(input)
}
fn not_<'src>(input: &mut &'src str) -> Result<Term<'src>> {
preceded('!', term)
.parse_next(input)
.map(|q| Term::Not(Box::new(q)))
}
fn term<'src>(input: &mut &'src str) -> Result<Term<'src>> {
alt((nonliteral, literal)).parse_next(input)
}
fn nonliteral<'src>(input: &mut &'src str) -> Result<Term<'src>> {
alt((wildcard, phrase, bare)).parse_next(input)
}
fn phrase<'src>(input: &mut &'src str) -> Result<Term<'src>> {
delimited(alt(('"', '“')), span, alt(('"', '”')))
.parse_next(input)
.map(Term::Phrase)
}
fn literal<'src>(input: &mut &'src str) -> Result<Term<'src>> {
delimited(alt(("'", "‘")), span, alt(("'", "’")))
.parse_next(input)
.map(Term::Literal)
}
fn span<'src>(input: &mut &'src str) -> Result<&'src str> {
take_while(1.., |c: char| {
c.is_alphanumeric() || c.is_whitespace() || "$%.,-".contains(c)
})
.parse_next(input)
}
fn wildcard<'src>(input: &mut &'src str) -> Result<Term<'src>> {
terminated(string, '*')
.parse_next(input)
.map(Term::Wildcard)
}
fn bare<'src>(input: &mut &'src str) -> Result<Term<'src>> {
string.parse_next(input).map(Term::Bare)
}
fn string<'src>(input: &mut &'src str) -> Result<&'src str> {
(alphanumeric1, not("/")).parse_next(input).map(|(s, ())| s)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bare() {
let mut input = "pizza";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Bare("pizza")]);
assert_eq!(result, expected);
}
#[test]
fn wildcard() {
let mut input = "pizz*";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Wildcard("pizz")]);
assert_eq!(result, expected);
}
#[test]
fn should_group() {
let mut input = "~(pizza pasta)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![
Term::Bare("pizza"),
Term::Bare("pasta"),
])]);
assert_eq!(result, expected);
}
#[test]
fn must_group() {
let mut input = "+(pizza pasta)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Must(vec![
Term::Bare("pizza"),
Term::Bare("pasta"),
])]);
assert_eq!(result, expected);
}
#[test]
fn paren() {
let mut input = "(pizza pasta)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![
Term::Bare("pizza"),
Term::Bare("pasta"),
])]);
assert_eq!(result, expected);
}
#[test]
fn simple() {
let mut input = "pizza pasta";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Bare("pizza"), Term::Bare("pasta")]);
assert_eq!(result, expected);
}
#[test]
fn single_should() {
let mut input = "~pizza";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![Term::Bare("pizza")])]);
assert_eq!(result, expected);
}
#[test]
fn multi_should() {
let mut input = "~pizza ~pasta";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Should(vec![Term::Bare("pizza")]),
Term::Should(vec![Term::Bare("pasta")]),
]);
assert_eq!(result, expected);
}
#[test]
fn mixed_prox_fails() {
let mut input = "n/20('dogs and cats' rats)";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn literal_prox_succeeds() {
let mut input = "n/20('dogs and cats' 'rats and mice')";
let result = Query::parse(&mut input);
assert!(result.is_ok());
}
#[test]
fn single_must() {
let mut input = "+pizza";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Must(vec![Term::Bare("pizza")])]);
assert_eq!(result, expected);
}
#[test]
fn multi_must() {
let mut input = "+pizza +pasta";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Must(vec![Term::Bare("pizza")]),
Term::Must(vec![Term::Bare("pasta")]),
]);
assert_eq!(result, expected);
}
#[test]
fn single_not() {
let mut input = "!pizza";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Not(Box::new(Term::Bare("pizza")))]);
assert_eq!(result, expected);
}
#[test]
fn multi_not() {
let mut input = "!pizza !pasta";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Not(Box::new(Term::Bare("pizza"))),
Term::Not(Box::new(Term::Bare("pasta"))),
]);
assert_eq!(result, expected);
}
#[test]
fn phrase() {
let mut input = r#""investment incentive""#;
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Phrase("investment incentive")]);
assert_eq!(result, expected);
}
#[test]
fn literal() {
let mut input = "'investment incentive'";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Literal("investment incentive")]);
assert_eq!(result, expected);
}
#[test]
fn mixed_boolean() {
let mut input = "+pizza ~pasta !mushroom";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Must(vec![Term::Bare("pizza")]),
Term::Should(vec![Term::Bare("pasta")]),
Term::Not(Box::new(Term::Bare("mushroom"))),
]);
assert_eq!(result, expected);
}
#[test]
fn near() {
let mut input = "n/25(pesticide insect)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Near(
25,
vec![Term::Bare("pesticide"), Term::Bare("insect")],
)]);
assert_eq!(result, expected);
}
#[test]
fn mixed_proximity_boolean_wildcard() {
let mut input = "N/10(~(pizz* past*) ~(tomat* mozzarell* arancin* crust))";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Near(
10,
vec![
Term::Should(vec![Term::Wildcard("pizz"), Term::Wildcard("past")]),
Term::Should(vec![
Term::Wildcard("tomat"),
Term::Wildcard("mozzarell"),
Term::Wildcard("arancin"),
Term::Bare("crust"),
]),
],
)]);
assert_eq!(result, expected);
}
#[test]
fn mixed_proximity_phrase() {
let mut input =
r#"~(n/20("dogs and cats" ~("rats" "mice")) n/20("dogs and cats" ~("rats" "mice")))"#;
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
])]);
assert_eq!(result, expected);
}
#[test]
fn mixed_proximity_phrase_extraneous_parens() {
let mut input = r#"~((n/20("dogs and cats" ~("rats" "mice"))))"#;
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
)])]);
assert_eq!(result, expected);
}
#[test]
fn multi_mixed_proximity_phrase_extraneous_parens() {
let mut input = r#"~((n/20("dogs and cats" ~("rats" "mice"))) (n/20("dogs and cats" ~("rats" "mice"))))"#;
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
])]);
assert_eq!(result, expected);
}
#[test]
fn nested_parens() {
let mut input = "((dogs cats))";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![Term::Grouped(vec![
Term::Bare("dogs"),
Term::Bare("cats"),
])])]);
assert_eq!(result, expected);
}
#[test]
fn terms_with_group() {
let mut input = "rats mice (dogs cats)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Bare("rats"),
Term::Bare("mice"),
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
]);
assert_eq!(result, expected);
}
#[test]
fn group_with_terms() {
let mut input = "(dogs cats) rats mice";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
Term::Bare("rats"),
Term::Bare("mice"),
]);
assert_eq!(result, expected);
}
#[test]
fn list_of_groups() {
let mut input = "(dogs cats) (rats mice)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
Term::Grouped(vec![Term::Bare("rats"), Term::Bare("mice")]),
]);
assert_eq!(result, expected);
}
#[test]
fn group_of_groups() {
let mut input = "((dogs cats) (rats mice))";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
Term::Grouped(vec![Term::Bare("rats"), Term::Bare("mice")]),
])]);
assert_eq!(result, expected);
}
#[test]
fn boolean_nested_parens() {
let mut input = "~((dogs cats))";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![
Term::Bare("dogs"),
Term::Bare("cats"),
])]);
assert_eq!(result, expected);
}
#[test]
fn multi_top_level_proximity() {
let mut input = "n/5(a b) w/6(c d)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Near(5, vec![Term::Bare("a"), Term::Bare("b")]),
Term::Within(6, vec![Term::Bare("c"), Term::Bare("d")]),
]);
assert_eq!(result, expected);
}
#[test]
fn nested_boolean() {
let mut input = "~(+a ~b)";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn proximity_unclosed_paren_error() {
let mut input = "n/30((~pipes ~paint) lead";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn should_unclosed_paren_error() {
let mut input = "~(pipes paint";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn invalid_proximity() {
let mut input = "n/30('pipes' paint)";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn redundant_term_with_should() {
let mut input = "a ~b a";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Bare("a"),
Term::Should(vec![Term::Bare("b")]),
Term::Bare("a"),
]);
assert_eq!(result, expected);
}
#[test]
fn leading_should() {
let mut input = "~b a";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![Term::Bare("b")]), Term::Bare("a")]);
assert_eq!(result, expected);
}
#[test]
fn extraneous_parens_with_should() {
let mut input = "(~a)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![Term::Should(vec![Term::Bare(
"a",
)])])]);
assert_eq!(result, expected);
}
#[test]
fn double_negative_is_err() {
let mut input = "!!pizza";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn negated_group_is_err() {
let mut input = "!(pizza)";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn wildcard_prefix_is_err() {
let mut input = "*izza";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn literal_queries_with_punctuation() {
let mut input = "'50% of the total amount appropriated'";
let result = Query::parse(&mut input);
assert!(result.is_ok());
let mut input = "'42 U.S.C. 651'";
let result = Query::parse(&mut input);
assert!(result.is_ok());
let mut input = "'availability, access, and utilization'";
let result = Query::parse(&mut input);
assert!(result.is_ok());
let mut input = "'audio-only'";
let result = Query::parse(&mut input);
assert!(result.is_ok());
}
#[test]
fn phrase_with_smart_quotes() {
let mut input = "“investment incentives”";
let result = Query::parse(&mut input).unwrap().to_string();
let expected = "\"investment incentives\"";
assert_eq!(result, expected);
}
#[test]
fn literal_with_smart_quotes() {
let mut input = "‘50% of the total amount appropriated’";
let result = Query::parse(&mut input).unwrap().to_string();
let expected = "'50% of the total amount appropriated'";
assert_eq!(result, expected);
}
#[test]
fn trailing_space_in_boolean_groups() {
let mut input = "~(pregan* matern* )";
let result = Query::parse(&mut input);
assert!(result.is_ok());
}
#[test]
fn leading_and_trailing_space_in_boolean_groups() {
let mut input = "~( pregan* matern* )";
let result = Query::parse(&mut input);
assert!(result.is_ok());
}
#[test]
fn multi_top_level_proximity_with_spaces() {
let mut input = "n/5( a b ) w/6( c d )";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Near(5, vec![Term::Bare("a"), Term::Bare("b")]),
Term::Within(6, vec![Term::Bare("c"), Term::Bare("d")]),
]);
assert_eq!(result, expected);
}
#[test]
fn group_of_groups_extra_spaces() {
let mut input = "( ( dogs cats ) ( rats mice ) )";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
Term::Grouped(vec![Term::Bare("rats"), Term::Bare("mice")]),
])]);
assert_eq!(result, expected);
}
}