use super::super::ast::*;
use crate::graph::core::pattern_matching::{PatternElement, PropertyMatcher};
use crate::graph::schema::DirGraph;
use crate::graph::storage::GraphRead;
use std::collections::{HashMap, HashSet};
pub(super) fn optimize_pattern_start_node(query: &mut CypherQuery, graph: &DirGraph) {
use crate::graph::core::pattern_matching::EdgeDirection;
let mut bound_vars: HashSet<String> = HashSet::new();
for clause in &mut query.clauses {
let (patterns, path_assignments) = match clause {
Clause::Match(m) => (&mut m.patterns, &m.path_assignments),
Clause::OptionalMatch(m) => (&mut m.patterns, &m.path_assignments),
_ => continue,
};
for (pi, pattern) in patterns.iter_mut().enumerate() {
if pattern.elements.len() < 3 {
continue;
}
if path_assignments.iter().any(|pa| pa.pattern_index == pi) {
continue;
}
let first_node = match &pattern.elements[0] {
PatternElement::Node(np) => np,
_ => continue,
};
let last_node = match pattern.elements.last() {
Some(PatternElement::Node(np)) => np,
_ => continue,
};
let first_sel = estimate_node_selectivity_in_context(first_node, graph, &bound_vars);
let last_sel = estimate_node_selectivity_in_context(last_node, graph, &bound_vars);
if last_sel.saturating_mul(5) >= first_sel {
continue;
}
pattern.elements.reverse();
for elem in &mut pattern.elements {
if let PatternElement::Edge(ep) = elem {
ep.direction = match ep.direction {
EdgeDirection::Outgoing => EdgeDirection::Incoming,
EdgeDirection::Incoming => EdgeDirection::Outgoing,
EdgeDirection::Both => EdgeDirection::Both,
};
}
}
}
for pattern in patterns.iter() {
for elem in &pattern.elements {
if let PatternElement::Node(np) = elem {
if let Some(ref v) = np.variable {
bound_vars.insert(v.clone());
}
}
}
}
}
}
fn estimate_node_selectivity_in_context(
np: &crate::graph::core::pattern_matching::NodePattern,
graph: &DirGraph,
bound_vars: &HashSet<String>,
) -> usize {
if let Some(ref v) = np.variable {
if bound_vars.contains(v) {
return 1;
}
}
estimate_node_selectivity(np, graph)
}
pub(super) fn estimate_node_selectivity(
np: &crate::graph::core::pattern_matching::NodePattern,
graph: &DirGraph,
) -> usize {
let type_count = np
.node_type
.as_ref()
.and_then(|t| graph.type_indices.get(t))
.map(|idx| idx.len())
.unwrap_or(GraphRead::node_count(&graph.graph));
let unconstrained = np.node_type.is_none();
match &np.properties {
None if unconstrained => usize::MAX,
None => type_count.max(1),
Some(props) if props.is_empty() && unconstrained => usize::MAX,
Some(props) if props.is_empty() => type_count.max(1),
Some(props) => {
for (prop, matcher) in props {
if prop == "id" {
match matcher {
PropertyMatcher::Equals(_) | PropertyMatcher::EqualsParam(_) => return 1,
PropertyMatcher::In(vals) => return vals.len(),
_ => {}
}
}
}
if let Some(ref nt) = np.node_type {
for (prop, matcher) in props {
match matcher {
PropertyMatcher::Equals(val) => {
let key = (nt.clone(), prop.clone());
if graph.property_indices.contains_key(&key) {
if let Some(results) = graph.lookup_by_index(nt, prop, val) {
return results.len().max(1);
}
return 1;
}
}
PropertyMatcher::In(vals) => return vals.len(),
_ => {}
}
}
}
let eq_count = props
.values()
.filter(|m| {
matches!(
m,
PropertyMatcher::Equals(_) | PropertyMatcher::EqualsParam(_)
)
})
.count();
let other_count = props.len() - eq_count;
let mut est = type_count;
for _ in 0..eq_count {
est /= 100;
}
for _ in 0..other_count {
est /= 10;
}
est.max(1)
}
}
}
pub(super) fn reorder_match_clauses(query: &mut CypherQuery, graph: &DirGraph) {
if !graph.has_edge_type_counts_cache() {
return;
}
let edge_counts = graph.get_edge_type_counts();
let triple_counts: Option<HashMap<(String, String, String), usize>> =
if graph.has_type_connectivity_cache() {
graph.get_type_connectivity().map(|triples| {
triples
.into_iter()
.map(|t| ((t.src, t.conn, t.tgt), t.count))
.collect()
})
} else {
None
};
let mut i = 0;
while i < query.clauses.len() {
let mut j = i;
while j < query.clauses.len() {
match &query.clauses[j] {
Clause::Match(m) if m.path_assignments.is_empty() => j += 1,
_ => break,
}
}
if j - i < 2 {
i = j.max(i + 1);
continue;
}
let mut costs: Vec<usize> = Vec::with_capacity(j - i);
let mut all_scored = true;
for k in i..j {
let m = match &query.clauses[k] {
Clause::Match(m) => m,
_ => unreachable!(),
};
match estimate_match_edge_cost(m, &edge_counts, triple_counts.as_ref()) {
Some(c) => costs.push(c),
None => {
all_scored = false;
break;
}
}
}
if !all_scored {
i = j;
continue;
}
if !shares_variable_across(&query.clauses[i..j]) {
i = j;
continue;
}
let mut order: Vec<(usize, usize)> = costs.iter().copied().enumerate().collect();
order.sort_by_key(|&(orig, c)| (c, orig));
let already_sorted = order
.iter()
.enumerate()
.all(|(pos, &(orig, _))| pos == orig);
if !already_sorted {
let extracted: Vec<Clause> = query.clauses.drain(i..j).collect();
for (offset, &(orig, _)) in order.iter().enumerate() {
query.clauses.insert(i + offset, extracted[orig].clone());
}
}
i = j;
}
}
fn estimate_match_edge_cost(
m: &MatchClause,
edge_counts: &HashMap<String, usize>,
triple_counts: Option<&HashMap<(String, String, String), usize>>,
) -> Option<usize> {
let mut total: usize = 0;
for pattern in &m.patterns {
if pattern.elements.len() < 3 {
return None;
}
let first = &pattern.elements[0];
let last = pattern.elements.last().unwrap();
if !is_id_anchored(first) && !is_id_anchored(last) {
return None;
}
let elems = &pattern.elements;
for idx in 0..elems.len() {
let ep = match &elems[idx] {
PatternElement::Edge(ep) => ep,
_ => continue,
};
let ct = ep.connection_type.as_ref()?;
let mut count: Option<usize> = None;
if let Some(triples) = triple_counts {
let src_label = idx
.checked_sub(1)
.and_then(|i| elems.get(i))
.and_then(node_label);
let tgt_label = elems.get(idx + 1).and_then(node_label);
if let (Some(sl), Some(tl)) = (src_label, tgt_label) {
let key_fwd = (sl.clone(), ct.clone(), tl.clone());
let key_rev = (tl, ct.clone(), sl);
let fwd = triples.get(&key_fwd).copied().unwrap_or(0);
let rev = triples.get(&key_rev).copied().unwrap_or(0);
if fwd > 0 || rev > 0 {
count = Some(fwd + rev);
}
}
}
let resolved = match count {
Some(c) => c,
None => *edge_counts.get(ct)?,
};
total = total.saturating_add(resolved);
}
}
Some(total)
}
fn node_label(elem: &PatternElement) -> Option<String> {
let np = match elem {
PatternElement::Node(np) => np,
_ => return None,
};
np.node_type.clone()
}
fn is_id_anchored(elem: &PatternElement) -> bool {
let np = match elem {
PatternElement::Node(np) => np,
_ => return false,
};
let props = match &np.properties {
Some(p) => p,
None => return false,
};
props.iter().any(|(prop, matcher)| {
prop == "id"
&& matches!(
matcher,
PropertyMatcher::Equals(_) | PropertyMatcher::EqualsParam(_)
)
})
}
fn shares_variable_across(clauses: &[Clause]) -> bool {
let mut common: Option<HashSet<String>> = None;
for clause in clauses {
let m = match clause {
Clause::Match(m) => m,
_ => return false,
};
let vars: HashSet<String> = m
.patterns
.iter()
.flat_map(|p| p.elements.iter())
.filter_map(|e| match e {
PatternElement::Node(np) => np.variable.clone(),
_ => None,
})
.collect();
common = Some(match common {
None => vars,
Some(prev) => prev.intersection(&vars).cloned().collect(),
});
}
common.is_some_and(|s| !s.is_empty())
}
pub(super) fn reorder_match_patterns(query: &mut CypherQuery, graph: &DirGraph) {
let mut bound_vars: HashSet<String> = HashSet::new();
for clause in &mut query.clauses {
let mc = match clause {
Clause::Match(mc) => mc,
Clause::OptionalMatch(mc) => {
for pat in mc.patterns.iter() {
for elem in &pat.elements {
if let PatternElement::Node(np) = elem {
if let Some(ref v) = np.variable {
bound_vars.insert(v.clone());
}
}
}
}
continue;
}
_ => continue,
};
if mc.patterns.len() < 2 || !mc.path_assignments.is_empty() {
for pat in mc.patterns.iter() {
for elem in &pat.elements {
if let PatternElement::Node(np) = elem {
if let Some(ref v) = np.variable {
bound_vars.insert(v.clone());
}
}
}
}
continue;
}
let mut pattern_scores: Vec<(usize, usize)> = mc
.patterns
.iter()
.enumerate()
.map(|(i, pat)| {
let sel = if let Some(PatternElement::Node(np)) = pat.elements.first() {
estimate_node_selectivity_in_context(np, graph, &bound_vars)
} else {
usize::MAX
};
(i, sel)
})
.collect();
pattern_scores.sort_by_key(|&(_, sel)| sel);
let already_ordered = pattern_scores
.iter()
.enumerate()
.all(|(pos, &(idx, _))| pos == idx);
if !already_ordered {
let old_patterns = std::mem::take(&mut mc.patterns);
mc.patterns = pattern_scores
.iter()
.map(|&(idx, _)| old_patterns[idx].clone())
.collect();
}
for pat in mc.patterns.iter() {
for elem in &pat.elements {
if let PatternElement::Node(np) = elem {
if let Some(ref v) = np.variable {
bound_vars.insert(v.clone());
}
}
}
}
}
}