graphannis/annis/db/
plan.rs

1use crate::annis::db::aql::disjunction::Disjunction;
2use crate::annis::db::aql::Config;
3use crate::annis::db::exec::{EmptyResultSet, ExecutionNode, ExecutionNodeDesc};
4use crate::annis::errors::*;
5use crate::annis::util::TimeoutCheck;
6use crate::AnnotationGraph;
7use graphannis_core::annostorage::match_group_with_symbol_ids;
8use graphannis_core::annostorage::symboltable::SymbolTable;
9use graphannis_core::{
10    annostorage::MatchGroup,
11    types::{AnnoKey, NodeID},
12};
13use std::collections::HashMap;
14use std::fmt::Formatter;
15use transient_btree_index::{BtreeConfig, BtreeIndex};
16
17pub struct ExecutionPlan<'a> {
18    plans: Vec<Box<dyn ExecutionNode<Item = Result<MatchGroup>> + 'a>>,
19    current_plan: usize,
20    descriptions: Vec<Option<ExecutionNodeDesc>>,
21    inverse_node_pos: Vec<Option<Vec<usize>>>,
22    proxy_mode: bool,
23    unique_result_set: BtreeIndex<Vec<(NodeID, usize)>, bool>,
24    anno_key_symbols: SymbolTable<AnnoKey>,
25}
26
27impl<'a> ExecutionPlan<'a> {
28    pub fn from_disjunction(
29        query: &'a Disjunction,
30        db: &'a AnnotationGraph,
31        config: &Config,
32        timeout: TimeoutCheck,
33    ) -> Result<ExecutionPlan<'a>> {
34        let mut plans: Vec<Box<dyn ExecutionNode<Item = Result<MatchGroup>> + 'a>> = Vec::new();
35        let mut descriptions = Vec::new();
36        let mut inverse_node_pos = Vec::new();
37        for alt in &query.alternatives {
38            let p = alt.make_exec_node(db, config, timeout);
39            if let Ok(p) = p {
40                descriptions.push(p.get_desc().cloned());
41
42                if let Some(desc) = p.get_desc() {
43                    // check if node position mapping is actually needed
44                    let node_pos_needed = desc
45                        .node_pos
46                        .iter()
47                        .any(|(target_pos, stream_pos)| target_pos != stream_pos);
48                    if node_pos_needed {
49                        // invert the node position mapping
50                        let new_mapping_map: HashMap<usize, usize> = desc
51                            .node_pos
52                            .iter()
53                            .map(|(target_pos, stream_pos)| (*stream_pos, *target_pos))
54                            .collect();
55                        let mut new_mapping: Vec<usize> = Vec::with_capacity(new_mapping_map.len());
56                        for i in 0..new_mapping_map.len() {
57                            let mapping_value = new_mapping_map.get(&i).unwrap_or(&i);
58                            new_mapping.push(*mapping_value);
59                        }
60                        inverse_node_pos.push(Some(new_mapping));
61                    } else {
62                        inverse_node_pos.push(None);
63                    }
64                } else {
65                    inverse_node_pos.push(None);
66                }
67
68                plans.push(p);
69            } else if let Err(e) = p {
70                if let GraphAnnisError::AQLSemanticError(_) = &e {
71                    return Err(e);
72                }
73            }
74        }
75
76        if plans.is_empty() {
77            // add a dummy execution step that yields no results
78            let no_results_exec = EmptyResultSet {};
79            plans.push(Box::new(no_results_exec));
80            descriptions.push(None);
81        }
82        let btree_config = BtreeConfig::default().fixed_value_size(std::mem::size_of::<bool>());
83        Ok(ExecutionPlan {
84            current_plan: 0,
85            descriptions,
86            inverse_node_pos,
87            proxy_mode: plans.len() == 1,
88            plans,
89            unique_result_set: BtreeIndex::with_capacity(btree_config, 10_000)?,
90            anno_key_symbols: SymbolTable::new(),
91        })
92    }
93
94    /// Re-orders the match vector from the top execution node to match the
95    /// requested query node order. If query nodes are not part of the result,
96    /// they are still included in the vector but you can not use the node ID at
97    /// this position.
98    fn reorder_match(&self, tmp: MatchGroup) -> MatchGroup {
99        if let Some(ref inverse_node_pos) = self.inverse_node_pos[self.current_plan] {
100            // re-order the matched nodes by the original node position of the query
101            let mut result = MatchGroup::new();
102            // We cannot assume that every node has a mapping, so use the maximum index
103            // in the mapping and not the size of the mapping vector as output vector size.
104            let output_size = if let Some(max_item) = inverse_node_pos.iter().max() {
105                *max_item + 1
106            } else {
107                0
108            };
109            result.resize_with(output_size, Default::default);
110            for (stream_pos, m) in tmp.into_iter().enumerate() {
111                let target_pos = inverse_node_pos[stream_pos];
112                result[target_pos] = m;
113            }
114            result
115        } else {
116            tmp
117        }
118    }
119
120    pub fn estimated_output_size(&self) -> usize {
121        let mut estimation = 0;
122        for desc in self.descriptions.iter().flatten() {
123            if let Some(ref cost) = desc.cost {
124                estimation += cost.output;
125            }
126        }
127        estimation
128    }
129
130    pub fn is_sorted_by_text(&self) -> bool {
131        if self.plans.len() > 1 {
132            false
133        } else if self.plans.is_empty() {
134            true
135        } else {
136            self.plans[0].is_sorted_by_text()
137        }
138    }
139
140    fn insert_into_unique_result_set(&mut self, n: &MatchGroup) -> Result<bool> {
141        let key = match_group_with_symbol_ids(n, &mut self.anno_key_symbols)?;
142        if !self.unique_result_set.contains_key(&key)? {
143            self.unique_result_set.insert(key, true)?;
144            return Ok(true);
145        }
146        Ok(false)
147    }
148}
149
150impl std::fmt::Display for ExecutionPlan<'_> {
151    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
152        for (i, d) in self.descriptions.iter().enumerate() {
153            if i > 0 {
154                writeln!(f, "---[OR]---")?;
155            }
156            if let Some(ref d) = d {
157                write!(f, "{}", d.debug_string(""))?;
158            } else {
159                write!(f, "<no description>")?;
160            }
161        }
162        Ok(())
163    }
164}
165
166impl Iterator for ExecutionPlan<'_> {
167    type Item = Result<MatchGroup>;
168
169    fn next(&mut self) -> Option<Self::Item> {
170        if self.proxy_mode {
171            // just act as an proxy, but make sure the order is the same as requested in the query
172            self.plans[0]
173                .next()
174                .map(|n| n.map(|n| self.reorder_match(n)))
175        } else {
176            while self.current_plan < self.plans.len() {
177                if let Some(n) = self.plans[self.current_plan].next() {
178                    match n {
179                        Ok(n) => {
180                            let n = self.reorder_match(n);
181
182                            // check if we already outputted this result
183                            match self.insert_into_unique_result_set(&n) {
184                                Ok(new_result) => {
185                                    if new_result {
186                                        // new result found, break out of while-loop and return the result
187                                        return Some(Ok(n));
188                                    }
189                                }
190                                Err(e) => return Some(Err(e)),
191                            }
192                        }
193                        Err(e) => {
194                            return Some(Err(e));
195                        }
196                    }
197                } else {
198                    // proceed to next plan
199                    self.current_plan += 1;
200                }
201            }
202            None
203        }
204    }
205}