use crate::query::{Query, Term};
use winnow::ascii::{digit1, multispace0, multispace1};
use winnow::combinator::{alt, cut_err, delimited, not, preceded, separated, terminated};
use winnow::error::{StrContext, StrContextValue};
use winnow::token::take_while;
use winnow::{ModalResult, Parser};
pub(crate) fn query<'src>(input: &mut &'src str) -> ModalResult<Query<'src>> {
terms.map(Query).parse_next(input)
}
fn terms<'src>(input: &mut &'src str) -> ModalResult<Vec<Term<'src>>> {
separated(1.., top, multispace1)
.context(StrContext::Label("term"))
.parse_next(input)
}
fn top<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
alt((proximity, boolean, paren_top, term, not_)).parse_next(input)
}
fn paren_top<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
delimited(
"(",
preceded(multispace0, terminated(terms, multispace0)),
cut_err(")")
.context(StrContext::Label("term group"))
.context(StrContext::Expected(StrContextValue::Description(")"))),
)
.map(Term::Grouped)
.parse_next(input)
}
fn proximity<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
let (prox_type, _, slop, args) = (
alt(('w', 'W', 'n', 'N')),
'/',
digit1.parse_to(),
paren_prox_args,
)
.parse_next(input)?;
if prox_type == 'w' || prox_type == 'W' {
Ok(Term::Within(slop, args))
} else {
Ok(Term::Near(slop, args))
}
}
fn boolean<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
let (bool_op, args) =
(alt(('+', '~')), alt((bool_arg.map(|q| vec![q]), bool_args))).parse_next(input)?;
if bool_op == '+' {
Ok(Term::Must(args))
} else {
Ok(Term::Should(args))
}
}
fn bool_arg<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
alt((proximity, term)).parse_next(input)
}
fn bool_args<'src>(input: &mut &'src str) -> ModalResult<Vec<Term<'src>>> {
cut_err(alt((
paren_bool_args,
separated(1.., bool_arg, multispace1),
)))
.context(StrContext::Label("boolean term"))
.parse_next(input)
}
fn paren_bool_args<'src>(input: &mut &'src str) -> ModalResult<Vec<Term<'src>>> {
separated(
1..,
delimited(
"(",
preceded(multispace0, terminated(bool_args, multispace0)),
cut_err(")")
.context(StrContext::Label("boolean group"))
.context(StrContext::Expected(StrContextValue::Description("`)`"))),
),
multispace1,
)
.map(|v: Vec<Vec<Term>>| v.into_iter().flatten().collect())
.parse_next(input)
}
fn prox_args<'src>(input: &mut &'src str) -> ModalResult<Vec<Term<'src>>> {
alt((
separated(1.., alt((proximity, boolean, nonliteral)), multispace1),
separated(
1..,
cut_err(literal)
.context(StrContext::Label("proximity terms"))
.context(StrContext::Expected(StrContextValue::Description(
"another single-quoted term",
))),
multispace1,
),
))
.parse_next(input)
}
fn paren_prox_args<'src>(input: &mut &'src str) -> ModalResult<Vec<Term<'src>>> {
delimited(
"(",
preceded(multispace0, terminated(prox_args, multispace0)),
cut_err(")")
.context(StrContext::Label("proximity term"))
.context(StrContext::Expected(StrContextValue::Description("`)`"))),
)
.parse_next(input)
}
fn not_<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
preceded(
'!',
term.context(StrContext::Label("not"))
.context(StrContext::Expected(StrContextValue::Description(
"`'`, `\"`, letter, or number",
))),
)
.parse_next(input)
.map(|q| Term::Not(Box::new(q)))
}
fn term<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
alt((literal, nonliteral)).parse_next(input)
}
fn nonliteral<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
alt((wildcard, phrase, bare)).parse_next(input)
}
fn phrase<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
delimited(
alt(('"', '“')),
span,
cut_err(alt(('"', '”')))
.context(StrContext::Label("phrase"))
.context(StrContext::Expected(StrContextValue::Description("\""))),
)
.parse_next(input)
.map(Term::Phrase)
}
fn literal<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
delimited(
alt(("'", "‘")),
span,
cut_err(alt(("'", "’")))
.context(StrContext::Label("literal"))
.context(StrContext::Expected(StrContextValue::Description("'"))),
)
.parse_next(input)
.map(Term::Literal)
}
fn span<'src>(input: &mut &'src str) -> ModalResult<&'src str> {
take_while(1.., |c: char| {
c.is_alphanumeric() || c.is_whitespace() || "$%.,-".contains(c)
})
.parse_next(input)
}
fn wildcard<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
terminated(string, '*')
.context(StrContext::Label("wildcard"))
.parse_next(input)
.map(Term::Wildcard)
}
fn bare<'src>(input: &mut &'src str) -> ModalResult<Term<'src>> {
string.parse_next(input).map(Term::Bare)
}
fn string<'src>(input: &mut &'src str) -> ModalResult<&'src str> {
(
take_while(1.., ('0'..='9', 'a'..='z', 'A'..='Z', '.')),
not("/"),
)
.parse_next(input)
.map(|(s, ())| s)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bare() {
let mut input = "pizza";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Bare("pizza")]);
assert_eq!(result, expected);
}
#[test]
fn wildcard() {
let mut input = "pizz*";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Wildcard("pizz")]);
assert_eq!(result, expected);
}
#[test]
fn should_group() {
let mut input = "~(pizza pasta)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![
Term::Bare("pizza"),
Term::Bare("pasta"),
])]);
assert_eq!(result, expected);
}
#[test]
fn must_group() {
let mut input = "+(pizza pasta)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Must(vec![
Term::Bare("pizza"),
Term::Bare("pasta"),
])]);
assert_eq!(result, expected);
}
#[test]
fn paren() {
let mut input = "(pizza pasta)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![
Term::Bare("pizza"),
Term::Bare("pasta"),
])]);
assert_eq!(result, expected);
}
#[test]
fn simple() {
let mut input = "pizza pasta";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Bare("pizza"), Term::Bare("pasta")]);
assert_eq!(result, expected);
}
#[test]
fn single_should() {
let mut input = "~pizza";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![Term::Bare("pizza")])]);
assert_eq!(result, expected);
}
#[test]
fn multi_should() {
let mut input = "~pizza ~pasta";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Should(vec![Term::Bare("pizza")]),
Term::Should(vec![Term::Bare("pasta")]),
]);
assert_eq!(result, expected);
}
#[test]
fn mixed_prox_fails() {
let mut input = "n/20('dogs and cats' rats)";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn literal_prox_succeeds() {
let mut input = "n/20('dogs and cats' 'rats and mice')";
let result = Query::parse(&mut input);
assert!(result.is_ok());
}
#[test]
fn single_must() {
let mut input = "+pizza";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Must(vec![Term::Bare("pizza")])]);
assert_eq!(result, expected);
}
#[test]
fn multi_must() {
let mut input = "+pizza +pasta";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Must(vec![Term::Bare("pizza")]),
Term::Must(vec![Term::Bare("pasta")]),
]);
assert_eq!(result, expected);
}
#[test]
fn single_not() {
let mut input = "!pizza";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Not(Box::new(Term::Bare("pizza")))]);
assert_eq!(result, expected);
}
#[test]
fn multi_not() {
let mut input = "!pizza !pasta";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Not(Box::new(Term::Bare("pizza"))),
Term::Not(Box::new(Term::Bare("pasta"))),
]);
assert_eq!(result, expected);
}
#[test]
fn phrase() {
let mut input = r#""investment incentive""#;
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Phrase("investment incentive")]);
assert_eq!(result, expected);
}
#[test]
fn literal() {
let mut input = "'investment incentive'";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Literal("investment incentive")]);
assert_eq!(result, expected);
}
#[test]
fn mixed_boolean() {
let mut input = "+pizza ~pasta !mushroom";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Must(vec![Term::Bare("pizza")]),
Term::Should(vec![Term::Bare("pasta")]),
Term::Not(Box::new(Term::Bare("mushroom"))),
]);
assert_eq!(result, expected);
}
#[test]
fn near() {
let mut input = "n/25(pesticide insect)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Near(
25,
vec![Term::Bare("pesticide"), Term::Bare("insect")],
)]);
assert_eq!(result, expected);
}
#[test]
fn mixed_proximity_boolean_wildcard() {
let mut input = "N/10(~(pizz* past*) ~(tomat* mozzarell* arancin* crust))";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Near(
10,
vec![
Term::Should(vec![Term::Wildcard("pizz"), Term::Wildcard("past")]),
Term::Should(vec![
Term::Wildcard("tomat"),
Term::Wildcard("mozzarell"),
Term::Wildcard("arancin"),
Term::Bare("crust"),
]),
],
)]);
assert_eq!(result, expected);
}
#[test]
fn mixed_proximity_phrase() {
let mut input =
r#"~(n/20("dogs and cats" ~("rats" "mice")) n/20("dogs and cats" ~("rats" "mice")))"#;
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
])]);
assert_eq!(result, expected);
}
#[test]
fn mixed_proximity_phrase_extraneous_parens() {
let mut input = r#"~((n/20("dogs and cats" ~("rats" "mice"))))"#;
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
)])]);
assert_eq!(result, expected);
}
#[test]
fn multi_mixed_proximity_phrase_extraneous_parens() {
let mut input = r#"~((n/20("dogs and cats" ~("rats" "mice"))) (n/20("dogs and cats" ~("rats" "mice"))))"#;
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
Term::Near(
20,
vec![
Term::Phrase("dogs and cats"),
Term::Should(vec![Term::Phrase("rats"), Term::Phrase("mice")]),
],
),
])]);
assert_eq!(result, expected);
}
#[test]
fn nested_parens() {
let mut input = "((dogs cats))";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![Term::Grouped(vec![
Term::Bare("dogs"),
Term::Bare("cats"),
])])]);
assert_eq!(result, expected);
}
#[test]
fn terms_with_group() {
let mut input = "rats mice (dogs cats)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Bare("rats"),
Term::Bare("mice"),
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
]);
assert_eq!(result, expected);
}
#[test]
fn group_with_terms() {
let mut input = "(dogs cats) rats mice";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
Term::Bare("rats"),
Term::Bare("mice"),
]);
assert_eq!(result, expected);
}
#[test]
fn list_of_groups() {
let mut input = "(dogs cats) (rats mice)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
Term::Grouped(vec![Term::Bare("rats"), Term::Bare("mice")]),
]);
assert_eq!(result, expected);
}
#[test]
fn group_of_groups() {
let mut input = "((dogs cats) (rats mice))";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
Term::Grouped(vec![Term::Bare("rats"), Term::Bare("mice")]),
])]);
assert_eq!(result, expected);
}
#[test]
fn boolean_nested_parens() {
let mut input = "~((dogs cats))";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![
Term::Bare("dogs"),
Term::Bare("cats"),
])]);
assert_eq!(result, expected);
}
#[test]
fn multi_top_level_proximity() {
let mut input = "n/5(a b) w/6(c d)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Near(5, vec![Term::Bare("a"), Term::Bare("b")]),
Term::Within(6, vec![Term::Bare("c"), Term::Bare("d")]),
]);
assert_eq!(result, expected);
}
#[test]
fn nested_boolean() {
let mut input = "~(+a ~b)";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn proximity_unclosed_paren_error() {
let mut input = "n/30((~pipes ~paint) lead";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn should_unclosed_paren_error() {
let mut input = "~(pipes paint";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn invalid_proximity() {
let mut input = "n/30('pipes' paint)";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn redundant_term_with_should() {
let mut input = "a ~b a";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Bare("a"),
Term::Should(vec![Term::Bare("b")]),
Term::Bare("a"),
]);
assert_eq!(result, expected);
}
#[test]
fn leading_should() {
let mut input = "~b a";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Should(vec![Term::Bare("b")]), Term::Bare("a")]);
assert_eq!(result, expected);
}
#[test]
fn extraneous_parens_with_should() {
let mut input = "(~a)";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![Term::Should(vec![Term::Bare(
"a",
)])])]);
assert_eq!(result, expected);
}
#[test]
fn double_negative_is_err() {
let mut input = "!!pizza";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn negated_group_is_err() {
let mut input = "!(pizza)";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn wildcard_prefix_is_err() {
let mut input = "*izza";
let result = Query::parse(&mut input);
assert!(result.is_err());
}
#[test]
fn literal_queries_with_punctuation() {
let mut input = "'50% of the total amount appropriated'";
let result = Query::parse(&mut input);
assert!(result.is_ok());
let mut input = "'42 U.S.C. 651'";
let result = Query::parse(&mut input);
assert!(result.is_ok());
let mut input = "'availability, access, and utilization'";
let result = Query::parse(&mut input);
assert!(result.is_ok());
let mut input = "'audio-only'";
let result = Query::parse(&mut input);
assert!(result.is_ok());
}
#[test]
fn phrase_with_smart_quotes() {
let mut input = "“investment incentives”";
let result = Query::parse(&mut input).unwrap().to_string();
let expected = "\"investment incentives\"";
assert_eq!(result, expected);
}
#[test]
fn literal_with_smart_quotes() {
let mut input = "‘50% of the total amount appropriated’";
let result = Query::parse(&mut input).unwrap().to_string();
let expected = "'50% of the total amount appropriated'";
assert_eq!(result, expected);
}
#[test]
fn trailing_space_in_boolean_groups() {
let mut input = "~(pregan* matern* )";
let result = Query::parse(&mut input);
assert!(result.is_ok());
}
#[test]
fn leading_and_trailing_space_in_boolean_groups() {
let mut input = "~( pregan* matern* )";
let result = Query::parse(&mut input);
assert!(result.is_ok());
}
#[test]
fn multi_top_level_proximity_with_spaces() {
let mut input = "n/5( a b ) w/6( c d )";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![
Term::Near(5, vec![Term::Bare("a"), Term::Bare("b")]),
Term::Within(6, vec![Term::Bare("c"), Term::Bare("d")]),
]);
assert_eq!(result, expected);
}
#[test]
fn group_of_groups_extra_spaces() {
let mut input = "( ( dogs cats ) ( rats mice ) )";
let result = Query::parse(&mut input).unwrap();
let expected = Query(vec![Term::Grouped(vec![
Term::Grouped(vec![Term::Bare("dogs"), Term::Bare("cats")]),
Term::Grouped(vec![Term::Bare("rats"), Term::Bare("mice")]),
])]);
assert_eq!(result, expected);
}
#[test]
fn punctuation_in_proximity_query() {
let input = "n/30(42 U.S.C. 4321 et seq. ~(a b c d))";
let result = Query::parse(input);
assert!(result.is_ok())
}
#[test]
fn unclosed_paren_err_message() {
let input = "(hello";
let result = Query::parse(input).unwrap_err();
let error = "\
(hello
^
invalid term group
expected )";
assert_eq!(result.to_string(), error);
}
#[test]
fn unclosed_bool_group_err_message() {
let input = "+(hello";
let result = Query::parse(input).unwrap_err();
let error = "\
+(hello
^
invalid boolean group
expected `)`";
assert_eq!(result.to_string(), error);
}
#[test]
fn unclosed_literal_err_message() {
let input = "'hello";
let result = Query::parse(input).unwrap_err();
let error = "\
'hello
^
invalid literal
expected '";
assert_eq!(result.to_string(), error);
}
#[test]
fn unclosed_phrase_err_message() {
let input = "\"hello";
let result = Query::parse(input).unwrap_err();
let error = "\
\"hello
^
invalid phrase
expected \"";
assert_eq!(result.to_string(), error);
}
#[test]
fn bool_op_nonsense_err_message() {
let input = "+~";
let result = Query::parse(input).unwrap_err();
assert_eq!(
result.to_string(),
"\
+~
^
invalid boolean term"
);
}
#[test]
fn invalid_not_group_err_message() {
let input = "!(hello world)";
let result = Query::parse(input).unwrap_err();
let error = "\
!(hello world)
^
invalid not
expected `'`, `\"`, letter, or number";
assert_eq!(result.to_string(), error);
}
#[test]
fn invalid_proximity_error_message() {
let input = "n/5('hello' world)";
let result = Query::parse(input).unwrap_err();
let error = "\
n/5('hello' world)
^
invalid proximity terms
expected another single-quoted term";
assert_eq!(result.to_string(), error);
}
#[test]
fn invalid_proximity_unclosed_paren_error_message() {
let input = "n/5(hello world";
let result = Query::parse(input).unwrap_err();
let error = "\
n/5(hello world
^
invalid proximity term
expected `)`";
assert_eq!(result.to_string(), error);
}
#[test]
fn invalid_asterisk_in_bare_term_error_message() {
let result = Query::parse("pi*zza").unwrap_err();
let error = "\
pi*zza
^
invalid term";
assert_eq!(result.to_string(), error)
}
#[test]
fn invalid_char_in_bare_term_error_message() {
let result = Query::parse("pi$zza").unwrap_err();
let error = "\
pi$zza
^
invalid term";
assert_eq!(result.to_string(), error)
}
}