1use std::collections::HashSet;
22use std::str::FromStr;
23
24use khive_types::EdgeRelation;
25
26use crate::ast::{Condition, ConditionValue, GqlQuery, PatternElement};
27use crate::error::QueryError;
28
29pub const MAX_DEPTH: usize = 10;
31
32pub fn validate(query: &mut GqlQuery) -> Result<(), QueryError> {
37 let mut seen_node_vars: HashSet<&str> = HashSet::new();
42 let mut seen_edge_vars: HashSet<&str> = HashSet::new();
43 for element in &query.pattern.elements {
44 match element {
45 PatternElement::Node(node) => {
46 if let Some(var) = node.variable.as_deref() {
47 if !seen_node_vars.insert(var) {
48 return Err(QueryError::Unsupported(format!(
49 "repeated node variable '{var}' (cycle / self-reachability \
50 requires alias-equality predicates not yet implemented)"
51 )));
52 }
53 }
54 }
55 PatternElement::Edge(edge) => {
56 if let Some(var) = edge.variable.as_deref() {
57 if !seen_edge_vars.insert(var) {
58 return Err(QueryError::Unsupported(format!(
59 "repeated edge variable '{var}' not supported"
60 )));
61 }
62 }
63 }
64 }
65 }
66
67 for element in &mut query.pattern.elements {
68 match element {
69 PatternElement::Node(node) => {
70 if node.properties.contains_key("namespace") {
71 return Err(QueryError::Validation(
72 "namespace is set by CompileOptions, not query text".into(),
73 ));
74 }
75 }
76 PatternElement::Edge(edge) => {
77 for relation in edge.relations.iter_mut() {
78 let parsed = EdgeRelation::from_str(relation)
79 .map_err(|err| QueryError::Validation(err.to_string()))?;
80 *relation = parsed.as_str().to_string();
81 }
82 if edge.min_hops == 0 {
83 return Err(QueryError::Unsupported(
84 "zero-hop ranges (min_hops = 0) not yet supported; \
85 use a minimum of 1 hop"
86 .into(),
87 ));
88 }
89 if edge.min_hops > edge.max_hops {
92 return Err(QueryError::Validation(format!(
93 "invalid hop range: min {} > max {}",
94 edge.min_hops, edge.max_hops
95 )));
96 }
97 if edge.min_hops > MAX_DEPTH {
101 return Err(QueryError::Unsupported(format!(
102 "minimum hop count {} exceeds depth cap {}",
103 edge.min_hops, MAX_DEPTH
104 )));
105 }
106 if edge.max_hops > MAX_DEPTH {
109 edge.max_hops = MAX_DEPTH;
110 }
111 }
112 }
113 }
114
115 let mut var_kinds: std::collections::HashMap<&str, VarKind> = std::collections::HashMap::new();
120 for element in &query.pattern.elements {
121 match element {
122 PatternElement::Node(n) => {
123 if let Some(v) = n.variable.as_deref() {
124 var_kinds.insert(v, VarKind::Node);
125 }
126 }
127 PatternElement::Edge(e) => {
128 if let Some(v) = e.variable.as_deref() {
129 var_kinds.insert(v, VarKind::Edge);
130 }
131 }
132 }
133 }
134
135 for cond in query.where_clause.iter_mut() {
136 let is_edge = var_kinds
137 .get(cond.variable.as_str())
138 .copied()
139 .unwrap_or(VarKind::Node)
140 == VarKind::Edge;
141 validate_condition(cond, is_edge)?;
142 }
143
144 Ok(())
145}
146
147#[derive(Clone, Copy, PartialEq, Eq)]
148enum VarKind {
149 Node,
150 Edge,
151}
152
153fn validate_condition(cond: &mut Condition, is_edge: bool) -> Result<(), QueryError> {
154 match cond.property.as_str() {
155 "namespace" => Err(QueryError::Validation(
156 "namespace is set by CompileOptions, not query text".into(),
157 )),
158 "kind" if !is_edge => Ok(()),
159 "relation" if is_edge => {
160 if let ConditionValue::String(ref mut s) = cond.value {
161 let parsed = EdgeRelation::from_str(s)
162 .map_err(|err| QueryError::Validation(err.to_string()))?;
163 *s = parsed.as_str().to_string();
164 }
165 Ok(())
166 }
167 _ => Ok(()),
168 }
169}
170
171#[cfg(test)]
172mod tests {
173 use super::*;
174 use crate::parsers::gql;
175
176 #[test]
177 fn node_kind_passes_through_unchanged() {
178 let mut q = gql::parse("MATCH (a:paper)-[:introduced_by]->(b:concept) RETURN a").unwrap();
180 validate(&mut q).unwrap();
181 let kinds: Vec<_> = q
182 .pattern
183 .nodes()
184 .map(|n| n.kind.as_deref().unwrap_or(""))
185 .collect();
186 assert_eq!(kinds, vec!["paper", "concept"]);
187 }
188
189 #[test]
190 fn normalises_relation_case_and_hyphens() {
191 let mut q = gql::parse("MATCH (a)-[:Introduced_By]->(b) RETURN a").unwrap();
192 validate(&mut q).unwrap();
193 let rels: Vec<_> = q
194 .pattern
195 .edges()
196 .flat_map(|e| e.relations.iter().cloned())
197 .collect();
198 assert_eq!(rels, vec!["introduced_by".to_string()]);
199 }
200
201 #[test]
202 fn rejects_unknown_relation() {
203 let mut q = gql::parse("MATCH (a)-[:not_a_relation]->(b) RETURN a").unwrap();
204 let err = validate(&mut q).unwrap_err();
205 let msg = err.to_string();
206 assert!(msg.contains("not_a_relation"), "msg: {msg}");
207 }
208
209 #[test]
210 fn unknown_kind_passes_through() {
211 let mut q = gql::parse("MATCH (a:gizmo)-[:extends]->(b) RETURN a").unwrap();
213 validate(&mut q).unwrap();
214 }
215
216 #[test]
217 fn clamps_depth_above_max() {
218 let mut q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
219 validate(&mut q).unwrap();
220 let edge = q.pattern.edges().next().unwrap();
221 assert_eq!(edge.max_hops, MAX_DEPTH);
222 assert!(edge.min_hops <= edge.max_hops);
223 }
224
225 #[test]
226 fn multi_relation_all_normalised() {
227 let mut q = gql::parse("MATCH (a)-[:Extends|VARIANT_OF]->(b) RETURN a").unwrap();
228 validate(&mut q).unwrap();
229 let edge = q.pattern.edges().next().unwrap();
230 assert_eq!(
231 edge.relations,
232 vec!["extends".to_string(), "variant_of".to_string()]
233 );
234 }
235
236 #[test]
237 fn rejects_namespace_in_where() {
238 let mut q =
239 gql::parse("MATCH (a:concept)-[:extends]->(b) WHERE a.namespace = 'other' RETURN a")
240 .unwrap();
241 let err = validate(&mut q).unwrap_err();
242 assert!(err.to_string().contains("namespace"), "msg: {err}");
243 }
244
245 #[test]
246 fn rejects_namespace_in_node_properties() {
247 let mut q =
248 gql::parse("MATCH (a:concept {namespace: 'other'})-[:extends]->(b) RETURN a").unwrap();
249 let err = validate(&mut q).unwrap_err();
250 assert!(err.to_string().contains("namespace"), "msg: {err}");
251 }
252
253 #[test]
254 fn rejects_unknown_relation_in_where() {
255 let mut q =
256 gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'related_to' RETURN a")
257 .unwrap();
258 let err = validate(&mut q).unwrap_err();
259 assert!(err.to_string().contains("related_to"), "msg: {err}");
260 }
261
262 #[test]
263 fn unknown_kind_in_where_passes_through() {
264 let mut q =
266 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'gizmo' RETURN a").unwrap();
267 validate(&mut q).unwrap();
268 let val = match &q.where_clause[0].value {
269 ConditionValue::String(s) => s.clone(),
270 _ => panic!("expected string"),
271 };
272 assert_eq!(val, "gizmo");
273 }
274
275 #[test]
276 fn kind_in_where_passes_through_unchanged() {
277 let mut q =
279 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'paper' RETURN a").unwrap();
280 validate(&mut q).unwrap();
281 let val = match &q.where_clause[0].value {
282 ConditionValue::String(s) => s.clone(),
283 _ => panic!("expected string"),
284 };
285 assert_eq!(val, "paper");
286 }
287
288 #[test]
289 fn normalises_relation_alias_in_where() {
290 let mut q =
291 gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'Introduced_By' RETURN a")
292 .unwrap();
293 validate(&mut q).unwrap();
294 let val = match &q.where_clause[0].value {
295 ConditionValue::String(s) => s.clone(),
296 _ => panic!("expected string"),
297 };
298 assert_eq!(val, "introduced_by");
299 }
300
301 #[test]
302 fn rejects_zero_hop_range_gql_wide() {
303 let mut q = gql::parse("MATCH (a)-[:extends*0..3]->(b) RETURN b").unwrap();
304 let err = validate(&mut q).unwrap_err();
305 assert!(
306 matches!(err, QueryError::Unsupported(_)),
307 "expected Unsupported, got {err:?}"
308 );
309 }
310
311 #[test]
312 fn rejects_zero_hop_range_gql_narrow() {
313 let mut q = gql::parse("MATCH (a)-[:extends*0..1]->(b) RETURN b").unwrap();
317 let err = validate(&mut q).unwrap_err();
318 assert!(
319 matches!(err, QueryError::Unsupported(_)),
320 "expected Unsupported, got {err:?}"
321 );
322 }
323
324 #[test]
325 fn rejects_zero_hop_sparql_explicit_range() {
326 use crate::parsers::sparql;
327 let mut q = sparql::parse("SELECT ?a ?b WHERE { ?a :extends{0,3} ?b . }").unwrap();
328 let err = validate(&mut q).unwrap_err();
329 assert!(
330 matches!(err, QueryError::Unsupported(_)),
331 "expected Unsupported, got {err:?}"
332 );
333 }
334
335 #[test]
336 fn rejects_repeated_node_var_cycle_gql() {
337 let mut q = gql::parse("MATCH (a)-[:extends]->(b)-[:variant_of]->(a) RETURN a").unwrap();
338 let err = validate(&mut q).unwrap_err();
339 assert!(
340 matches!(err, QueryError::Unsupported(_)),
341 "expected Unsupported, got {err:?}"
342 );
343 }
344
345 #[test]
346 fn rejects_repeated_node_var_self_reach_variable_length() {
347 let mut q = gql::parse("MATCH (a)-[:extends*1..3]->(a) RETURN a").unwrap();
348 let err = validate(&mut q).unwrap_err();
349 assert!(
350 matches!(err, QueryError::Unsupported(_)),
351 "expected Unsupported, got {err:?}"
352 );
353 }
354
355 #[test]
356 fn rejects_repeated_node_var_cycle_sparql() {
357 use crate::parsers::sparql;
358 let mut q =
359 sparql::parse("SELECT ?a WHERE { ?a :extends ?b . ?b :variant_of ?a . }").unwrap();
360 let err = validate(&mut q).unwrap_err();
361 assert!(
362 matches!(err, QueryError::Unsupported(_)),
363 "expected Unsupported, got {err:?}"
364 );
365 }
366
367 #[test]
368 fn rejects_repeated_edge_var() {
369 let mut q = gql::parse("MATCH (a)-[e:extends]->(b)-[e:variant_of]->(c) RETURN c").unwrap();
370 let err = validate(&mut q).unwrap_err();
371 assert!(
372 matches!(err, QueryError::Unsupported(_)),
373 "expected Unsupported, got {err:?}"
374 );
375 }
376
377 #[test]
378 fn rejects_inverted_range() {
379 let mut q = gql::parse("MATCH (a)-[:extends*3..1]->(b) RETURN b").unwrap();
381 let err = validate(&mut q).unwrap_err();
382 assert!(
383 matches!(err, QueryError::Validation(_)),
384 "expected Validation error, got {err:?}"
385 );
386 }
387
388 #[test]
389 fn rejects_min_hops_above_depth_cap() {
390 let mut q = gql::parse("MATCH (a)-[:extends*50..100]->(b) RETURN b").unwrap();
393 let err = validate(&mut q).unwrap_err();
394 assert!(
395 matches!(err, QueryError::Unsupported(_)),
396 "expected Unsupported, got {err:?}"
397 );
398 }
399
400 #[test]
401 fn clamps_max_but_keeps_satisfiable_min() {
402 let mut q = gql::parse("MATCH (a)-[:extends*2..50]->(b) RETURN b").unwrap();
404 validate(&mut q).unwrap();
405 let edge = q.pattern.edges().next().unwrap();
406 assert_eq!(edge.min_hops, 2);
407 assert_eq!(edge.max_hops, MAX_DEPTH);
408 }
409
410 #[test]
411 fn node_property_named_relation_allowed() {
412 let mut q =
415 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.relation = 'external' RETURN a").unwrap();
416 validate(&mut q).unwrap();
417 let val = match &q.where_clause[0].value {
418 ConditionValue::String(s) => s.clone(),
419 _ => panic!("expected string"),
420 };
421 assert_eq!(val, "external");
422 }
423
424 #[test]
425 fn edge_relation_still_validated() {
426 let mut q = gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'not_real' RETURN a")
429 .unwrap();
430 let err = validate(&mut q).unwrap_err();
431 assert!(err.to_string().contains("not_real"), "msg: {err}");
432 }
433}