use regex::Regex;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::sync::LazyLock;
use super::grammar::{DEFAULT_FIELD, unescape};
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Comparison {
Gt,
Lt,
Gte,
Lte,
}
impl Comparison {
pub fn as_lucene(&self) -> String {
match self {
Comparison::Gt => String::from(">"),
Comparison::Lt => String::from("<"),
Comparison::Gte => String::from(">="),
Comparison::Lte => String::from("<="),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum ComparisonValue {
Unbounded,
String(String),
Integer(i64),
Float(f64),
}
impl std::fmt::Display for ComparisonValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::String(s) => write!(f, "{s}"),
Self::Integer(num) => write!(f, "{num}"),
Self::Float(num) => write!(f, "{num}"),
Self::Unbounded => write!(f, "*"),
}
}
}
impl ComparisonValue {
pub fn to_lucene(&self) -> String {
match self {
Self::String(s) => QueryNode::lucene_escape(s),
Self::Integer(num) => num.to_string(),
Self::Float(num) => num.to_string(),
Self::Unbounded => "*".to_string(),
}
}
}
impl<T: AsRef<str>> From<T> for ComparisonValue {
fn from(s: T) -> Self {
let v = escape_quotes(unescape(s.as_ref()));
if v == "*" {
ComparisonValue::Unbounded
} else if let Ok(v) = v.parse::<i64>() {
ComparisonValue::Integer(v)
} else if let Ok(v) = v.parse::<f64>() {
ComparisonValue::Float(v)
} else {
ComparisonValue::String(v)
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum Range {
Comparison(Comparison),
Value(ComparisonValue),
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum BooleanType {
And,
Or,
}
#[derive(Clone, Debug, PartialEq)]
pub enum QueryNode {
MatchAllDocs,
MatchNoDocs,
AttributeExists { attr: String },
AttributeMissing { attr: String },
AttributeRange {
attr: String,
lower: ComparisonValue,
lower_inclusive: bool,
upper: ComparisonValue,
upper_inclusive: bool,
},
AttributeComparison {
attr: String,
comparator: Comparison,
value: ComparisonValue,
},
AttributeTerm { attr: String, value: String },
QuotedAttribute { attr: String, phrase: String },
AttributePrefix { attr: String, prefix: String },
AttributeWildcard { attr: String, wildcard: String },
NegatedNode { node: Box<QueryNode> },
Boolean {
oper: BooleanType,
nodes: Vec<QueryNode>,
},
}
impl QueryNode {
pub fn to_lucene(&self) -> String {
match self {
QueryNode::MatchAllDocs => String::from("*:*"),
QueryNode::MatchNoDocs => String::from("-*:*"),
QueryNode::AttributeExists { attr } => format!("_exists_:{attr}"),
QueryNode::AttributeMissing { attr } => format!("_missing_:{attr}"),
QueryNode::AttributeRange {
attr,
lower,
lower_inclusive,
upper,
upper_inclusive,
} => {
let lower_bracket = if *lower_inclusive { "[" } else { "{" };
let upper_bracket = if *upper_inclusive { "]" } else { "}" };
Self::is_default_attr(attr)
+ &format!(
"{}{} TO {}{}",
lower_bracket,
lower.to_lucene(),
upper.to_lucene(),
upper_bracket
)
}
QueryNode::AttributeComparison {
attr,
comparator,
value,
} => {
Self::is_default_attr(attr)
+ &format!("{}{}", comparator.as_lucene(), value.to_lucene())
}
QueryNode::AttributeTerm { attr, value } => {
Self::is_default_attr(attr) + &Self::lucene_escape(value)
}
QueryNode::QuotedAttribute { attr, phrase } => {
Self::is_default_attr(attr) + &format!("\"{}\"", &Self::quoted_escape(phrase))
}
QueryNode::AttributePrefix { attr, prefix } => {
Self::is_default_attr(attr) + &format!("{}*", &Self::lucene_escape(prefix))
}
QueryNode::AttributeWildcard { attr, wildcard } => {
Self::is_default_attr(attr) + wildcard
}
QueryNode::NegatedNode { node } => {
if matches!(
**node,
QueryNode::NegatedNode { .. } | QueryNode::Boolean { .. }
) {
format!("NOT ({})", node.to_lucene())
} else {
format!("NOT {}", node.to_lucene())
}
}
QueryNode::Boolean {
oper: BooleanType::And,
nodes,
..
} => {
if nodes.is_empty() {
return String::from("*:*");
}
let mut output = String::new();
for n in nodes {
if !output.is_empty() {
output.push_str(" AND ");
}
if let QueryNode::NegatedNode { node } = n {
output.push_str("NOT ");
let qstr = if let QueryNode::Boolean { .. } = **node {
format!("({})", node.to_lucene())
} else {
node.to_lucene()
};
output.push_str(&qstr);
} else {
let qstr = if let QueryNode::Boolean { .. } = n {
format!("({})", n.to_lucene())
} else {
n.to_lucene()
};
output.push_str(&qstr);
}
}
output
}
QueryNode::Boolean {
oper: BooleanType::Or,
nodes,
..
} => {
if nodes.is_empty() {
return String::from("-*:*");
}
let mut output = String::new();
for n in nodes {
if !output.is_empty() {
output.push_str(" OR ");
}
let qstr = if let QueryNode::Boolean { .. } = n {
format!("({})", n.to_lucene())
} else {
n.to_lucene()
};
output.push_str(&qstr);
}
output
}
}
}
pub fn lucene_escape(input: &str) -> String {
let mut output = String::with_capacity(input.len());
for c in input.chars() {
if matches!(
c,
':' | '+'
| '-'
| '='
| '>'
| '<'
| '!'
| '('
| ')'
| '{'
| '}'
| '['
| ']'
| '^'
| '"'
| '~'
| '*'
| '?'
| '\\'
| '/'
) {
output.push('\\');
}
output.push(c);
}
output
}
fn quoted_escape(input: &str) -> String {
let mut output = String::with_capacity(input.len());
for c in input.chars() {
if matches!(c, '"' | '\\') {
output.push('\\');
}
output.push(c);
}
output
}
fn is_default_attr(attr: &str) -> String {
if attr == DEFAULT_FIELD {
String::new()
} else {
format!("{attr}:")
}
}
pub fn new_boolean(conjunction: BooleanType, nodes: Vec<QueryNode>) -> QueryNode {
if nodes.len() == 1 {
return nodes.into_iter().next().expect("Known to have length 1");
}
QueryNode::Boolean {
oper: conjunction,
nodes,
}
}
}
impl<'de> Deserialize<'de> for QueryNode {
fn deserialize<D>(deserializer: D) -> Result<QueryNode, D::Error>
where
D: Deserializer<'de>,
{
use serde::de::Error;
let s = String::deserialize(deserializer)?;
s.parse::<QueryNode>()
.map_err(|e| D::Error::custom(e.to_string()))
}
}
impl Serialize for QueryNode {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.to_lucene().as_str())
}
}
static ESCAPE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new("^\"(.+)\"$").unwrap());
fn escape_quotes<T: AsRef<str>>(value: T) -> String {
ESCAPE_RE.replace_all(value.as_ref(), "$1").to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn query_node_serializes_to_string() {
assert_eq!(
serde_json::to_string(&QueryNode::AttributeExists {
attr: "something".into()
})
.unwrap(),
r#""_exists_:something""#
);
}
#[test]
fn query_node_deserializes_from_string() {
assert_eq!(
serde_json::from_str::<QueryNode>(r#""_missing_:something_else""#).unwrap(),
QueryNode::AttributeMissing {
attr: "something_else".into()
}
);
}
}