graphannis/annis/db/aql/operators/
inclusion.rs1use crate::annis::db::exec::CostEstimate;
2use crate::annis::db::token_helper;
3use crate::annis::db::token_helper::TokenHelper;
4use crate::annis::errors::GraphAnnisError;
5use crate::annis::operator::{BinaryOperator, BinaryOperatorIndex, EstimationType};
6use crate::{AnnotationGraph, try_as_boxed_iter};
7use crate::{
8 annis::operator::{BinaryOperatorBase, BinaryOperatorSpec},
9 errors::Result,
10 graph::{GraphStorage, Match},
11 model::AnnotationComponentType,
12};
13use graphannis_core::types::NodeID;
14use graphannis_core::{
15 graph::{ANNIS_NS, DEFAULT_ANNO_KEY},
16 types::Component,
17};
18use itertools::Itertools;
19
20use std::collections::HashSet;
21use std::sync::Arc;
22
23#[derive(Clone, Debug, PartialOrd, Ord, Hash, PartialEq, Eq)]
24pub struct InclusionSpec;
25
26pub struct Inclusion<'a> {
27 gs_order: Arc<dyn GraphStorage>,
28 tok_helper: TokenHelper<'a>,
29}
30
31lazy_static! {
32 static ref COMPONENT_ORDER: Component<AnnotationComponentType> = {
33 Component::new(
34 AnnotationComponentType::Ordering,
35 ANNIS_NS.into(),
36 "".into(),
37 )
38 };
39}
40
41impl BinaryOperatorSpec for InclusionSpec {
42 fn necessary_components(
43 &self,
44 db: &AnnotationGraph,
45 ) -> HashSet<Component<AnnotationComponentType>> {
46 let mut v = HashSet::default();
47 v.insert(COMPONENT_ORDER.clone());
48 v.extend(token_helper::necessary_components(db));
49 v
50 }
51
52 fn create_operator<'a>(
53 &self,
54 db: &'a AnnotationGraph,
55 _cost_estimate: Option<(&CostEstimate, &CostEstimate)>,
56 ) -> Result<BinaryOperator<'a>> {
57 let optional_op = Inclusion::new(db);
58 optional_op.map(|op| BinaryOperator::Index(Box::new(op)))
59 }
60
61 #[cfg(test)]
62 fn into_any(self: Arc<Self>) -> Arc<dyn std::any::Any> {
63 self
64 }
65
66 #[cfg(test)]
67 fn any_ref(&self) -> &dyn std::any::Any {
68 self
69 }
70}
71
72impl<'a> Inclusion<'a> {
73 pub fn new(db: &'a AnnotationGraph) -> Result<Inclusion<'a>> {
74 let gs_order = db.get_graphstorage(&COMPONENT_ORDER).ok_or_else(|| {
75 GraphAnnisError::ImpossibleSearch(
76 "Ordering component missing (needed for _i_ operator)".to_string(),
77 )
78 })?;
79
80 let tok_helper = TokenHelper::new(db)?;
81
82 Ok(Inclusion {
83 gs_order,
84 tok_helper,
85 })
86 }
87}
88
89impl std::fmt::Display for Inclusion<'_> {
90 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
91 write!(f, "_i_")
92 }
93}
94
95impl BinaryOperatorBase for Inclusion<'_> {
96 fn filter_match(&self, lhs: &Match, rhs: &Match) -> Result<bool> {
97 let left_right_lhs = self.tok_helper.left_right_token_for(lhs.node)?;
98 let left_right_rhs = self.tok_helper.left_right_token_for(rhs.node)?;
99 if let (Some(start_lhs), Some(end_lhs), Some(start_rhs), Some(end_rhs)) = (
100 left_right_lhs.0,
101 left_right_lhs.1,
102 left_right_rhs.0,
103 left_right_rhs.1,
104 ) {
105 if let Some(l) = self.gs_order.distance(start_lhs, end_lhs)? {
107 if self.gs_order.is_connected(start_lhs, start_rhs, 0, std::ops::Bound::Included(l))?
109 && self.gs_order.is_connected(end_rhs, end_lhs, 0, std::ops::Bound::Included(l))?
111 {
112 return Ok(true);
113 }
114 }
115 }
116
117 Ok(false)
118 }
119
120 fn is_reflexive(&self) -> bool {
121 false
122 }
123
124 fn estimation_type(&self) -> Result<EstimationType> {
125 if let (Some(stats_order), Some(stats_left)) = (
126 self.gs_order.get_statistics(),
127 self.tok_helper.get_gs_left_token().get_statistics(),
128 ) {
129 let mut sum_cov_nodes = 0;
130 let mut sum_included = 0;
131
132 let num_of_token = stats_order.nodes as f64;
133 for gs_cov in self.tok_helper.get_gs_coverage().iter() {
134 if let Some(stats_cov) = gs_cov.get_statistics() {
135 sum_cov_nodes += stats_cov.nodes;
136
137 let covered_token_per_node = stats_cov.fan_out_99_percentile;
138 let aligned_non_token =
139 covered_token_per_node * stats_left.inverse_fan_out_99_percentile;
140
141 sum_included += covered_token_per_node + aligned_non_token;
142 }
143 }
144 if sum_cov_nodes == 0 {
145 return Ok(EstimationType::Selectivity(1.0 / num_of_token));
147 } else {
148 return Ok(EstimationType::Selectivity(
149 (sum_included as f64) / (sum_cov_nodes as f64),
150 ));
151 }
152 }
153
154 Ok(EstimationType::Selectivity(0.1))
155 }
156}
157
158enum NodeType {
159 Token(NodeID),
160 Other(NodeID),
161}
162
163impl BinaryOperatorIndex for Inclusion<'_> {
164 fn retrieve_matches<'b>(&'b self, lhs: &Match) -> Box<dyn Iterator<Item = Result<Match>> + 'b> {
165 let left_right_token = try_as_boxed_iter!(self.tok_helper.left_right_token_for(lhs.node));
166 if let (Some(start_lhs), Some(end_lhs)) = left_right_token {
167 let l = try_as_boxed_iter!(self.gs_order.distance(start_lhs, end_lhs));
169 if let Some(l) = l {
170 let overlapped_token =
172 self.gs_order
173 .find_connected(start_lhs, 0, std::ops::Bound::Included(l));
174 let candidates = overlapped_token
176 .map_ok(move |t| {
177 let others = self
178 .tok_helper
179 .get_gs_left_token()
180 .get_ingoing_edges(t)
181 .map_ok(NodeType::Other);
182 std::iter::once(Ok(NodeType::Token(t))).chain(others)
184 })
185 .flatten_ok()
186 .map(|c| match c {
188 Ok(c) => match c {
189 Ok(c) => Ok(c),
190 Err(e) => Err(GraphAnnisError::from(e)),
191 },
192 Err(e) => Err(GraphAnnisError::from(e)),
193 });
194 let result = candidates
196 .map(move |n| {
197 let n = n?;
198 match n {
199 NodeType::Token(t) => Ok(Some(t)),
200 NodeType::Other(n) => {
201 let mut end_n =
203 self.tok_helper.get_gs_right_token().get_outgoing_edges(n);
204 if let Some(end_n) = end_n.next() {
205 let end_n = end_n?;
206 if self.gs_order.is_connected(
207 end_n,
208 end_lhs,
209 0,
210 std::ops::Bound::Included(l),
211 )? {
212 return Ok(Some(n));
215 }
216 }
217 Ok(None)
218 }
219 }
220 })
221 .filter_map_ok(|n| n)
223 .map_ok(|n| Match {
224 node: n,
225 anno_key: DEFAULT_ANNO_KEY.clone(),
226 });
227 return Box::new(result);
228 }
229 }
230
231 Box::new(std::iter::empty())
232 }
233
234 fn as_binary_operator(&self) -> &dyn BinaryOperatorBase {
235 self
236 }
237}