1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
//! `MATCH p = shortestPath(...)` executor.
//!
//! Extracted from `executor/mod.rs` in 0.9.53 — see `test_mod_rs_purity`.
//! The Phase A.3 "binding fix" added enough lines to push mod.rs past
//! the 1300-line shim cap; the natural home for the function is its
//! own module since it represents a distinct execution shape (BFS
//! between two anchor points rather than the usual pattern-walk).
use super::*;
use crate::graph::core::pattern_matching::PatternExecutor;
use petgraph::graph::NodeIndex;
impl<'a> CypherExecutor<'a> {
/// Execute a shortestPath MATCH: find shortest path between anchored endpoints
pub(super) fn execute_shortest_path_match(
&self,
clause: &MatchClause,
path_assignment: &PathAssignment,
existing: ResultSet,
) -> Result<ResultSet, String> {
let pattern = clause
.patterns
.get(path_assignment.pattern_index)
.ok_or("Invalid pattern index for shortestPath")?;
// Extract source and target node patterns from the pattern
let elements = &pattern.elements;
if elements.len() < 3 {
return Err("shortestPath requires a pattern like (a)-[:REL*..N]->(b)".to_string());
}
let source_pattern = match &elements[0] {
PatternElement::Node(np) => np,
_ => return Err("shortestPath pattern must start with a node".to_string()),
};
let target_pattern = match elements.last() {
Some(PatternElement::Node(np)) => np,
_ => return Err("shortestPath pattern must end with a node".to_string()),
};
// Extract edge direction and connection type from the pattern
let (edge_direction, edge_connection_type) = elements
.iter()
.find_map(|elem| {
if let PatternElement::Edge(ep) = elem {
Some((ep.direction, ep.connection_type.clone()))
} else {
None
}
})
.unwrap_or((EdgeDirection::Both, None));
let connection_types_vec: Option<Vec<String>> = edge_connection_type.map(|ct| vec![ct]);
let connection_types: Option<&[String]> = connection_types_vec.as_deref();
// Build the (source_idx, target_idx, prior_row) work-list.
//
// Phase A.3 / 0.9.53 fix: when the source or target variable is
// already bound from a prior MATCH clause (the canonical Neo4j
// pattern is `MATCH (a {id: X}), (b {id: Y}) MATCH p =
// shortestPath((a)-[*]-(b))`), we MUST use the bound NodeIndex
// and skip re-resolution. Pre-fix, this branch called
// `find_matching_nodes_pub` on the bare-variable patterns,
// which (correctly) returned ALL nodes in the graph — turning a
// single BFS into a 500K × 500K cartesian-product runaway on
// realistic agent workloads.
//
// Fast path: every input row contributes exactly one (src, tgt)
// pair (or zero, if a variable isn't bound and the pattern is
// bare). Slow path (no prior rows): fall back to full pattern
// resolution + cartesian product, matching pre-fix behaviour
// for bare-pattern shortestPath callers.
let src_var = source_pattern.variable.as_deref();
let tgt_var = target_pattern.variable.as_deref();
let pairs: Vec<(NodeIndex, NodeIndex, Option<&ResultRow>)> = if !existing.rows.is_empty()
&& src_var
.map(|v| existing.rows[0].node_bindings.contains_key(v))
.unwrap_or(false)
&& tgt_var
.map(|v| existing.rows[0].node_bindings.contains_key(v))
.unwrap_or(false)
{
// Fast path — both endpoints pre-bound. One pair per input row.
let mut out = Vec::with_capacity(existing.rows.len());
for row in &existing.rows {
let src = match src_var.and_then(|v| row.node_bindings.get(v)) {
Some(&idx) => idx,
None => continue,
};
let tgt = match tgt_var.and_then(|v| row.node_bindings.get(v)) {
Some(&idx) => idx,
None => continue,
};
out.push((src, tgt, Some(row)));
}
out
} else {
// Slow path — no prior bindings; resolve patterns + cartesian product.
let executor =
PatternExecutor::new_lightweight_with_params(self.graph, None, self.params)
.set_deadline(self.deadline);
let source_nodes = executor.find_matching_nodes_pub(source_pattern)?;
let target_nodes = executor.find_matching_nodes_pub(target_pattern)?;
let mut out = Vec::with_capacity(source_nodes.len() * target_nodes.len());
for &s in &source_nodes {
for &t in &target_nodes {
out.push((s, t, None));
}
}
out
};
let mut all_rows = Vec::new();
for (source_idx, target_idx, prior_row) in pairs {
{
if source_idx == target_idx {
continue;
}
// Dispatch based on edge direction in the pattern
let path_result = match edge_direction {
EdgeDirection::Both => {
// Undirected BFS — same behavior as fluent API shortest_path()
crate::graph::algorithms::graph_algorithms::shortest_path(
self.graph,
source_idx,
target_idx,
connection_types,
None,
self.deadline,
)
}
EdgeDirection::Outgoing => {
// Directed BFS — only follow outgoing edges
crate::graph::algorithms::graph_algorithms::shortest_path_directed(
self.graph,
source_idx,
target_idx,
connection_types,
None,
self.deadline,
)
}
EdgeDirection::Incoming => {
// Reverse source/target and follow outgoing, then reverse path
crate::graph::algorithms::graph_algorithms::shortest_path_directed(
self.graph,
target_idx,
source_idx,
connection_types,
None,
self.deadline,
)
.map(|mut pr| {
pr.path.reverse();
pr
})
}
};
if let Some(path_result) = path_result {
// Start from the prior row's bindings (if any) so
// downstream RETURN can see fields the prior MATCH
// exposed (e.g. `RETURN start.foo`).
let mut row = match prior_row {
Some(pr) => pr.clone(),
None => ResultRow::new(),
};
// Bind source variable
if let Some(ref var) = source_pattern.variable {
row.node_bindings.insert(var.clone(), source_idx);
}
// Bind target variable
if let Some(ref var) = target_pattern.variable {
row.node_bindings.insert(var.clone(), target_idx);
}
// Build path with connection types.
// Format: [(node, conn_type_leading_to_node), ...] — excludes source.
// Source is stored separately in PathBinding.source.
let connections =
crate::graph::algorithms::graph_algorithms::get_path_connections(
self.graph,
&path_result.path,
);
let path_nodes: Vec<(NodeIndex, String)> = path_result
.path
.iter()
.skip(1) // Skip source — it's in PathBinding.source
.enumerate()
.map(|(i, &idx)| {
let conn_type = if i < connections.len() {
connections[i].clone().unwrap_or_default()
} else {
String::new()
};
(idx, conn_type)
})
.collect();
// Store path binding
row.path_bindings.insert(
path_assignment.variable.clone(),
PathBinding {
source: source_idx,
hops: path_result.cost,
path: path_nodes,
},
);
all_rows.push(row);
}
}
}
Ok(ResultSet {
rows: all_rows,
columns: existing.columns,
lazy_return_items: None,
})
}
}