use self::nodesearch::NodeSearch;
use crate::{
annis::operator::{BinaryOperatorBase, EstimationType},
errors::Result,
graph::Match,
};
use graphannis_core::annostorage::NodeAnnotationStorage;
use graphannis_core::{annostorage::MatchGroup, types::AnnoKey};
use std::collections::BTreeMap;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct CostEstimate {
pub output: usize,
pub intermediate_sum: usize,
pub processed_in_step: usize,
}
#[derive(Debug, Clone)]
pub struct ExecutionNodeDesc {
pub component_nr: usize,
pub lhs: Option<Box<ExecutionNodeDesc>>,
pub rhs: Option<Box<ExecutionNodeDesc>>,
pub node_pos: BTreeMap<usize, usize>,
pub impl_description: String,
pub query_fragment: String,
pub cost: Option<CostEstimate>,
}
fn calculate_outputsize<Op: BinaryOperatorBase + ?Sized>(
op: &Op,
cost_lhs: &CostEstimate,
cost_rhs: &CostEstimate,
) -> Result<usize> {
let output = match op.estimation_type()? {
EstimationType::Selectivity(selectivity) => {
let num_tuples = (cost_lhs.output * cost_rhs.output) as f64;
if let Some(edge_sel) = op.edge_anno_selectivity()? {
(num_tuples * selectivity * edge_sel).round() as usize
} else {
(num_tuples * selectivity).round() as usize
}
}
EstimationType::Min => std::cmp::min(cost_lhs.output, cost_rhs.output),
};
Ok(std::cmp::max(output, 1))
}
impl ExecutionNodeDesc {
pub fn empty_with_fragment(
node_nr: usize,
query_fragment: String,
estimated_output: usize,
) -> ExecutionNodeDesc {
let mut node_pos = BTreeMap::new();
node_pos.insert(node_nr, 0);
let cost = Some(CostEstimate {
output: estimated_output,
intermediate_sum: 0,
processed_in_step: 0,
});
ExecutionNodeDesc {
component_nr: 0,
lhs: None,
rhs: None,
node_pos,
impl_description: String::from(""),
query_fragment,
cost,
}
}
pub fn join<Op: BinaryOperatorBase + ?Sized>(
op: &Op,
lhs: Option<&ExecutionNodeDesc>,
rhs: Option<&ExecutionNodeDesc>,
impl_description: &str,
query_fragment: &str,
processed_func: &dyn Fn(EstimationType, usize, usize) -> usize,
) -> Result<ExecutionNodeDesc> {
let component_nr = if let Some(d) = lhs {
d.component_nr
} else if let Some(d) = rhs {
d.component_nr
} else {
0
};
let mut node_pos = BTreeMap::new();
let offset = if let Some(lhs) = lhs {
node_pos = lhs.node_pos.clone();
node_pos.len()
} else {
0
};
if let Some(rhs) = rhs {
for e in &rhs.node_pos {
node_pos.insert(*e.0, e.1 + offset);
}
}
let cost = if let (Some(lhs), Some(rhs)) = (lhs, rhs) {
if let (Some(cost_lhs), Some(cost_rhs)) = (&lhs.cost, &rhs.cost) {
let output = calculate_outputsize(op, cost_lhs, cost_rhs)?;
let processed_in_step =
processed_func(op.estimation_type()?, cost_lhs.output, cost_rhs.output);
Some(CostEstimate {
output,
intermediate_sum: processed_in_step
+ cost_lhs.intermediate_sum
+ cost_rhs.intermediate_sum,
processed_in_step,
})
} else {
None
}
} else {
None
};
Ok(ExecutionNodeDesc {
component_nr,
lhs: lhs.map(|x| Box::new(x.clone())),
rhs: rhs.map(|x| Box::new(x.clone())),
node_pos,
impl_description: String::from(impl_description),
query_fragment: String::from(query_fragment),
cost,
})
}
pub fn debug_string(&self, indention: &str) -> String {
let mut result = String::from(indention);
let cost_str = if let Some(ref cost) = self.cost {
format!(
"out: {}, sum: {}, instep: {}",
cost.output, cost.intermediate_sum, cost.processed_in_step
)
} else {
String::from("no cost estimated")
};
if self.lhs.is_none() && self.rhs.is_none() {
let node_nr = self.node_pos.keys().next().cloned().unwrap_or(0) + 1;
result.push_str(&format!(
"#{} ({}) [{}] {}\n",
&node_nr.to_string(),
&self.query_fragment,
&cost_str,
&self.impl_description,
));
} else {
result.push_str(&format!(
"+|{} ({}) [{}]\n",
&self.impl_description, &self.query_fragment, &cost_str
));
let new_indention = format!("{} ", indention);
if let Some(ref lhs) = self.lhs {
result.push_str(&lhs.debug_string(&new_indention));
}
if let Some(ref rhs) = self.rhs {
result.push_str(&rhs.debug_string(&new_indention));
}
}
result
}
}
pub type MatchValueFilterFunc =
Box<dyn Fn(&Match, &dyn NodeAnnotationStorage) -> Result<bool> + Send + Sync>;
pub struct NodeSearchDesc {
pub qname: (Option<String>, Option<String>),
pub cond: Vec<MatchValueFilterFunc>,
pub const_output: Option<Arc<AnnoKey>>,
}
pub trait ExecutionNode: Iterator {
fn as_nodesearch(&self) -> Option<&NodeSearch<'_>> {
None
}
fn get_desc(&self) -> Option<&ExecutionNodeDesc> {
None
}
fn is_sorted_by_text(&self) -> bool {
false
}
}
pub struct EmptyResultSet;
impl Iterator for EmptyResultSet {
type Item = Result<MatchGroup>;
fn next(&mut self) -> Option<Result<MatchGroup>> {
None
}
}
impl ExecutionNode for EmptyResultSet {
fn as_nodesearch(&self) -> Option<&NodeSearch<'_>> {
None
}
fn get_desc(&self) -> Option<&ExecutionNodeDesc> {
None
}
}
pub mod filter;
pub mod indexjoin;
pub mod nestedloop;
pub mod nodesearch;
pub mod parallel;
pub mod tokensearch;