lucene-query-syntax 0.1.1

Parses a subset of the Apache Lucene query syntax
Documentation
use std::str::FromStr;

use pom::{self, char_class::*, parser::*};

use crate::ast::{Boundary, BoundaryKind, Operator, Term, Value};

fn space<'a>() -> Parser<'a, u8, ()> {
    one_of(b" \t\r\n").repeat(0..).discard()
}

fn phrase<'a>() -> Parser<'a, u8, String> {
    let escape_sequence =
        sym(b'\\') * (sym(b'\\') | sym(b'"') | sym(b'n').map(|_| b'\n') | sym(b't').map(|_| b'\t'));

    let character_string = (none_of(b"\\\"") | escape_sequence)
        .repeat(1..)
        .convert(String::from_utf8);

    let string = sym(b'"') * character_string.repeat(0..) - sym(b'"');

    string.map(|strings| strings.concat())
}

fn single_term<'a>() -> Parser<'a, u8, String> {
    (is_a(alphanum) | one_of(b"-_."))
        .repeat(1..)
        .collect()
        .convert(|x| String::from_utf8(x.to_vec()))
}

fn text<'a>() -> Parser<'a, u8, String> {
    phrase() | single_term()
}

fn integer<'a>() -> Parser<'a, u8, i64> {
    is_a(digit)
        .repeat(0..)
        .convert(String::from_utf8)
        .convert(|s| i64::from_str(&s))
}

fn boolean<'a>() -> Parser<'a, u8, bool> {
    seq(b"true").map(|_| true) | seq(b"false").map(|_| false)
}

fn range<'a>() -> Parser<'a, u8, Value> {
    let open =
        sym(b'[').map(|_| BoundaryKind::Inclusive) | sym(b'{').map(|_| BoundaryKind::Exclusive);

    let close =
        sym(b']').map(|_| BoundaryKind::Inclusive) | sym(b'}').map(|_| BoundaryKind::Exclusive);

    (open + space() * integer() + space() * seq(b"TO") * space() * integer() + close).map(
        |(((start_kind, start), end), end_kind)| {
            Value::Range(
                Boundary {
                    value: start,
                    kind: start_kind,
                },
                Boundary {
                    value: end,
                    kind: end_kind,
                },
            )
        },
    )
}

fn value<'a>() -> Parser<'a, u8, Value> {
    range() | boolean().map(Value::from) | integer().map(Value::from) | text().map(Value::from)
}

fn operator<'a>() -> Parser<'a, u8, Operator> {
    let core = (seq(b"AND") | seq(b"&&")).map(|_| Operator::And)
        | (seq(b"OR") | seq(b"||")).map(|_| Operator::Or)
        | space().map(|_| Operator::Or);

    space() * core - space()
}

fn field<'a>() -> Parser<'a, u8, Term> {
    (single_term() - sym(b':') + value()).map(|(k, v)| Term::Field { field: k, value: v })
}

fn default<'a>() -> Parser<'a, u8, Term> {
    text().map(Value::from).map(Term::new_default)
}

fn term<'a>() -> Parser<'a, u8, Term> {
    field() | default()
}

fn many<'a>() -> Parser<'a, u8, Term> {
    (space() * call(partial_expr) + (operator() + call(expr)).repeat(1..) - space()).map(
        |(head, tail)| {
            tail.into_iter()
                .fold(head, |left, (operator, right)| Term::Combine {
                    left: Box::new(left),
                    right: Box::new(right),
                    operator,
                })
        },
    )
}

fn group<'a>() -> Parser<'a, u8, Term> {
    sym(b'(') * space() * call(expr) - space() - sym(b')')
}

fn negated<'a>() -> Parser<'a, u8, Term> {
    (sym(b'-') * (group() | term()))
        .map(Box::new)
        .map(Term::new_negate)
}

fn partial_expr<'a>() -> Parser<'a, u8, Term> {
    negated() | group() | term()
}

fn expr<'a>() -> Parser<'a, u8, Term> {
    many() | call(partial_expr)
}

pub fn query<'a>() -> Parser<'a, u8, Term> {
    space() * call(expr) - space() - end()
}