use super::ast;
use boolean_expression::Expr;
use std::rc::Rc;
use crate::annis::operator::EdgeAnnoSearchSpec;
use crate::annis::db::exec::nodesearch::NodeSearchSpec;
use crate::annis::db::aql::operators::{
AritySpec,
OverlapSpec,
IdenticalCoverageSpec,
PrecedenceSpec,
NearSpec,
DominanceSpec,
PointingSpec,
PartOfSubCorpusSpec,
InclusionSpec,
LeftAlignmentSpec,
RightAlignmentSpec,
IdenticalNodeSpec,
};
use self::ast::RangeSpec;
grammar;
match {
"tok" => TOK,
"node" => NODE,
"_=_" => IDENT_COV,
"_ident_" => IDENT_NODE,
"_o_" => OVERLAP,
"_i_" => INCLUSION,
"_l_" => LEFT_ALIGNED,
"_r_" => RIGHT_ALIGNED,
} else {
r"[a-zA-Z_%][a-zA-Z0-9_\-%]*" => ID,
r##"#[0-9]+"## => NODE_REF,
r##"#[a-zA-Z][a-zA-Z0-9]*"## => VARIABLE_NODE_REF,
r##"[a-zA-Z][a-zA-Z0-9]*#"## => VARIABLE_DEF,
r"[0-9]+" => DIGITS,
_
}
pub Disjunction : ast::Expr = {
<head:Conjunction> <tail:("|" Conjunction)*> => {
let mut expr = head;
for t in tail.into_iter() {
expr = ast::Expr::or(expr, t.1)
}
return expr;
},
}
Conjunction : ast::Expr = {
<head:Factor> <tail:("&" Factor)*> => {
let mut expr = head;
for t in tail.into_iter() {
expr = ast::Expr::and(expr, t.1)
}
return expr;
},
}
Factor : ast::Expr = {
Literal => <>,
"(" <d:Disjunction> ")" => <>,
}
Literal : ast::Expr = {
// any node annotation search
<start: @L> <var:(VARIABLE_DEF)?> <spec:NodeSearch> <optional:"?"?> <end: @R> => {
let pos = Some(ast::Pos{start, end});
let optional = optional.is_some();
Expr::Terminal(ast::Literal::NodeSearch{pos, spec, optional, variable: var.and_then(|s| Some(s[0..s.len()-1].to_string()))})
},
// binary operator
<start: @L> <lhs:Operand> <neg:"!"?> <op:BinaryOpSpec> <rhs:Operand> <tail:("!"? BinaryOpSpec Operand)*> <end: @R> => {
// TODO: can we get the position for each individual binary operator?
let pos = ast::Pos {
start, end
};
let mut expr =
Expr::Terminal(
ast::Literal::BinaryOp{lhs: lhs.clone(), op, rhs: rhs.clone(), pos: Some(pos.clone()), negated: neg.is_some()}
);
// transform the list of literals into an AND if there is a tail
let mut last_operand = rhs.clone();
for t in tail.into_iter() {
let tail_expr = ast::Literal::BinaryOp{
lhs: last_operand.clone(),
op: t.1,
rhs: t.2.clone(),
pos: Some(pos.clone()),
negated: t.0.is_some(),
};
expr = ast::Expr::and(expr, Expr::Terminal(tail_expr));
last_operand = t.2;
}
expr
},
// binary filter between node references (and not any operands)
<start: @L> <lhs:NodeRef> <filter:FilterOpSpec> <rhs:NodeRef> <tail:(FilterOpSpec NodeRef)*> <end: @R> => {
// TODO: can we get the position for each individual binary operator?
let pos = ast::Pos {
start, end
};
let mut expr =
Expr::Terminal(
ast::Literal::BinaryOp{lhs: ast::Operand::NodeRef(lhs.clone()), op: filter,
rhs: ast::Operand::NodeRef(rhs.clone()), pos: Some(pos.clone()), negated: false}
);
// transform the list of literals into an AND if there is a tail
let mut last_operand = rhs.clone();
for t in tail.into_iter() {
let tail_expr = ast::Literal::BinaryOp{
lhs: ast::Operand::NodeRef(last_operand.clone()),
op: t.0,
rhs: ast::Operand::NodeRef(t.1.clone()),
pos: Some(pos.clone()),
negated: false,
};
expr = ast::Expr::and(expr, Expr::Terminal(tail_expr));
last_operand = t.1;
}
expr
},
// unary operator
<start: @L> <node_ref:NodeRef> <op:UnaryOpSpec> <end: @R> => {
let pos = Some(ast::Pos{start, end});
Expr::Terminal(ast::Literal::UnaryOp{node_ref, op, pos})
},
// legacy meta-data query `meta::doc="..."`
<start: @L> "meta::" <name:QName> <cmp:ComparisonOperator> <text:TextSearch> <end: @R> => {
let pos = ast::Pos {start, end};
let spec = match text.1 {
ast::StringMatchType::Exact => {
match cmp {
ast::ComparisonOperator::Equal => {
NodeSearchSpec::ExactValue {
ns: name.0,
name: name.1,
val: Some(text.0),
is_meta: true,
}
}
ast::ComparisonOperator::NotEqual => {
NodeSearchSpec::NotExactValue {
ns: name.0,
name: name.1,
val: text.0,
is_meta: true,
}
}
}
},
ast::StringMatchType::Regex => {
match cmp {
ast::ComparisonOperator::Equal => {
NodeSearchSpec::RegexValue {
ns: name.0,
name: name.1,
val: text.0,
is_meta: true,
}
}
ast::ComparisonOperator::NotEqual => {
NodeSearchSpec::NotRegexValue {
ns: name.0,
name: name.1,
val: text.0,
is_meta: true,
}
}
}
},
};
Expr::Terminal(ast::Literal::LegacyMetaSearch{spec, pos})
},
}
Operand : ast::Operand = {
NodeRef => ast::Operand::NodeRef(<>),
<start:@L> <var:(VARIABLE_DEF)?> <spec:NodeSearch> <optional:"?"?> <end:@R> => {
let pos = ast::Pos {start, end};
let spec = Rc::from(spec);
let variable = var.and_then(|s| Some(s[0..s.len()-1].to_string()));
let optional = optional.is_some();
ast::Operand::Literal{spec, pos, variable, optional}
},
}
/// General search for annotation nodes
NodeSearch : NodeSearchSpec = {
// searching for nodes with `node`
NODE => NodeSearchSpec::AnyNode,
// searching for tokens with `tok`
TOK => NodeSearchSpec::AnyToken,
// searching for a token value specificly on the minimal "tok" layer, e.g. tok="abc"
<tok_def:TOK> <cmp:ComparisonOperator> <val:TextSearch> => {
let spec = match val.1 {
ast::StringMatchType::Exact => {
match cmp {
ast::ComparisonOperator::Equal => {
NodeSearchSpec::ExactTokenValue {
val: val.0,
leafs_only: true,
}
}
ast::ComparisonOperator::NotEqual => {
NodeSearchSpec::NotExactTokenValue {
val: val.0,
}
}
}
},
ast::StringMatchType::Regex => {
match cmp {
ast::ComparisonOperator::Equal => {
NodeSearchSpec::RegexTokenValue {
val: val.0,
leafs_only: true,
}
}
ast::ComparisonOperator::NotEqual => {
NodeSearchSpec::NotRegexTokenValue {
val: val.0,
}
}
}
},
};
spec
},
// searching for a span value on any tokenization/segmentation layer, e.g. "abc"
<val:TextSearch> => {
let spec = match val.1 {
ast::StringMatchType::Exact => {
NodeSearchSpec::ExactTokenValue {
val: val.0,
leafs_only: false,
}
},
ast::StringMatchType::Regex => {
NodeSearchSpec::RegexTokenValue {
val: val.0,
leafs_only: false,
}
},
};
spec
},
// named annotation search with value, e.g. pos="NN"
<name:QName> <cmp:ComparisonOperator> <text:TextSearch> => {
let spec = match text.1 {
ast::StringMatchType::Exact => {
match cmp {
ast::ComparisonOperator::Equal => {
NodeSearchSpec::ExactValue {
ns: name.0,
name: name.1,
val: Some(text.0),
is_meta: false,
}
}
ast::ComparisonOperator::NotEqual => {
NodeSearchSpec::NotExactValue {
ns: name.0,
name: name.1,
val: text.0,
is_meta: false,
}
}
}
},
ast::StringMatchType::Regex => {
match cmp {
ast::ComparisonOperator::Equal => {
NodeSearchSpec::RegexValue {
ns: name.0,
name: name.1,
val: text.0,
is_meta: false,
}
}
ast::ComparisonOperator::NotEqual => {
NodeSearchSpec::NotRegexValue {
ns: name.0,
name: name.1,
val: text.0,
is_meta: false,
}
}
}
},
};
spec
},
// search for annotation name without value, e.g. pos
<name:QName> => {
let spec = NodeSearchSpec::ExactValue {
ns: name.0,
name: name.1,
val: None,
is_meta: false,
};
spec
},
}
/// Node references like `#1` (using the index of the node) or `#abc`
/// (using explicit name for variable)
NodeRef : ast::NodeRef = {
<v:NODE_REF> => ast::NodeRef::ID(v[1..].parse::<usize>().unwrap()),
<v:VARIABLE_NODE_REF> => ast::NodeRef::Name(v[1..].to_string()),
}
/// Binary operators that take a LHS and RHS as argument, e.g. `#1 ->dep #2`
BinaryOpSpec : ast::BinaryOpSpec = {
// Dominance (direct edge annotation)
<type_def:r">([a-zA-Z_%][a-zA-Z0-9_\-%]*)?"> <anno:EdgeAnno> => {
let name = type_def[">".len()..].to_string();
ast::BinaryOpSpec::Dominance(DominanceSpec {
name,
dist: RangeSpec::Bound {min_dist: 1, max_dist: 1},
edge_anno: Some(anno),
})
},
// Dominance (without edge annotation)
<type_def:r">([a-zA-Z_%][a-zA-Z0-9_\-%]*)?"> <range:(RangeSpec)?> => {
let name = type_def[">".len()..].to_string();
if let Some(range) = range {
ast::BinaryOpSpec::Dominance(DominanceSpec {
name,
dist: range,
edge_anno: None,
})
} else {
ast::BinaryOpSpec::Dominance(DominanceSpec {
name,
dist: RangeSpec::Bound{min_dist: 1, max_dist: 1},
edge_anno: None,
})
}
},
// Pointing (direct with edge annotation)
<type_def:r"->[a-zA-Z_%][a-zA-Z0-9_\-%]*"> <anno:EdgeAnno> => {
let name = type_def["->".len()..].to_string();
ast::BinaryOpSpec::Pointing(PointingSpec {
name,
dist: RangeSpec::Bound{min_dist: 1, max_dist: 1},
edge_anno: Some(anno),
})
},
// Pointing (without edge annotation)
<type_def:r"->[a-zA-Z_%][a-zA-Z0-9_\-%]*"> <range:(RangeSpec)?> => {
let name = type_def["->".len()..].to_string();
if let Some(range) = range {
ast::BinaryOpSpec::Pointing(PointingSpec {
name,
dist: range,
edge_anno: None,
})
} else {
ast::BinaryOpSpec::Pointing(PointingSpec {
name,
dist: RangeSpec::Bound{min_dist: 1, max_dist: 1},
edge_anno: None,
})
}
},
//Precedence
<prec_def:r"\.([a-zA-Z_%][a-zA-Z0-9_\-%]*)?"> <range:(RangeSpec)?> => {
let seg_name = prec_def[".".len()..].to_string();
let segmentation = if seg_name.is_empty() {
None
} else {
Some(seg_name)
};
if let Some(range) = range {
ast::BinaryOpSpec::Precedence(PrecedenceSpec {
segmentation,
dist: range,
})
} else {
ast::BinaryOpSpec::Precedence(PrecedenceSpec {
segmentation,
dist: RangeSpec::Bound{min_dist: 1, max_dist: 1},
})
}
},
//Near
<near_def:r"\^([a-zA-Z_%][a-zA-Z0-9_\-%]*)?"> <range:(RangeSpec)?> => {
let seg_name = near_def["^".len()..].to_string();
let segmentation = if seg_name.is_empty() {
None
} else {
Some(seg_name)
};
if let Some(range) = range {
ast::BinaryOpSpec::Near(NearSpec {
segmentation,
dist: range,
})
} else {
ast::BinaryOpSpec::Near(NearSpec {
segmentation,
dist: RangeSpec::Bound{min_dist: 1, max_dist: 1},
})
}
},
// Part of subcorpus
"@" <range:(RangeSpec)?> => {
if let Some(range) = range {
ast::BinaryOpSpec::PartOfSubCorpus(PartOfSubCorpusSpec {
dist: range,
})
} else {
ast::BinaryOpSpec::PartOfSubCorpus(PartOfSubCorpusSpec {
dist: RangeSpec::Bound{min_dist: 1, max_dist: 1},
})
}
},
// Overlap
OVERLAP => ast::BinaryOpSpec::Overlap(OverlapSpec {reflexive: false}),
// Identical coverage
IDENT_COV => ast::BinaryOpSpec::IdenticalCoverage(IdenticalCoverageSpec {}),
// Inclusion
INCLUSION => ast::BinaryOpSpec::Inclusion(InclusionSpec {}),
// Left alignment
LEFT_ALIGNED => ast::BinaryOpSpec::LeftAlignment(LeftAlignmentSpec {}),
// Right alignment
RIGHT_ALIGNED => ast::BinaryOpSpec::RightAlignment(RightAlignmentSpec {}),
// Identical node
IDENT_NODE => ast::BinaryOpSpec::IdenticalNode(IdenticalNodeSpec {}),
// TODO: add more binary operators
}
/// Filters which can be only applied between node references
FilterOpSpec : ast::BinaryOpSpec = {
"==" => ast::BinaryOpSpec::ValueComparison(ast::ComparisonOperator::Equal),
"!=" => ast::BinaryOpSpec::ValueComparison(ast::ComparisonOperator::NotEqual),
}
/// Unary operators only have one operand
UnaryOpSpec : ast::UnaryOpSpec = {
// Part of subcorpus
":arity" "=" <children:RangeSpec> => {
ast::UnaryOpSpec::Arity(AritySpec {
children
})
},
// TODO: add more unary operators
}
ComparisonOperator: ast::ComparisonOperator = {
"=" => ast::ComparisonOperator::Equal,
"!=" => ast::ComparisonOperator::NotEqual,
}
TextSearch: ast::TextSearch = {
<v:r#""[^"]*""#> => ast::TextSearch(String::from(&v[1..v.len()-1]), ast::StringMatchType::Exact),
// see https://stackoverflow.com/questions/37032620/regex-for-matching-a-string-literal-in-java
// for a example how to match escaped quotation characters
<v:r#"/[^/\\]*(\\.[^/\\]*)*/"#> => ast::TextSearch(String::from(&v[1..v.len()-1]), ast::StringMatchType::Regex),
};
EdgeAnno: EdgeAnnoSearchSpec = {
"[" <name:QName> <cmp:ComparisonOperator> <val:TextSearch> "]" => {
// TODO: multiple edge annotations
let spec = match val.1 {
ast::StringMatchType::Exact => {
match cmp {
ast::ComparisonOperator::Equal => {
EdgeAnnoSearchSpec::ExactValue {
ns: name.0,
name: name.1,
val: Some(val.0),
}
}
ast::ComparisonOperator::NotEqual => {
EdgeAnnoSearchSpec::NotExactValue {
ns: name.0,
name: name.1,
val: val.0,
}
}
}
},
ast::StringMatchType::Regex => {
match cmp {
ast::ComparisonOperator::Equal => {
EdgeAnnoSearchSpec::RegexValue {
ns: name.0,
name: name.1,
val: val.0,
}
}
ast::ComparisonOperator::NotEqual => {
EdgeAnnoSearchSpec::NotRegexValue {
ns: name.0,
name: name.1,
val: val.0,
}
}
}
},
};
spec
},
}
RangeSpec: ast::RangeSpec = {
(",")? <min:DIGITS> "," <max:DIGITS> => ast::RangeSpec::Bound {
min_dist: min.parse().unwrap_or_default(),
max_dist: max.parse().unwrap_or_default(),
},
(",")? <exact:DIGITS> => ast::RangeSpec::Bound {
min_dist: exact.parse().unwrap_or_default(),
max_dist: exact.parse().unwrap_or_default(),
},
"*" => ast::RangeSpec::Unbound,
}
QName: ast::QName = {
<ns:ID> ":" <name:ID> => ast::QName(Some(String::from(ns)), String::from(name)),
<name:ID> => ast::QName(None, String::from(name)),
};