1use std::collections::HashSet;
22use std::str::FromStr;
23
24use khive_types::EdgeRelation;
25
26use crate::ast::{Condition, ConditionValue, GqlQuery, PatternElement};
27use crate::error::QueryError;
28
29pub const MAX_DEPTH: usize = 10;
31
32pub fn validate(query: &mut GqlQuery) -> Result<(), QueryError> {
37 validate_with_warnings(query).map(|_| ())
38}
39
40pub fn validate_with_warnings(query: &mut GqlQuery) -> Result<Vec<String>, QueryError> {
46 let warnings: Vec<String> = Vec::new();
47
48 let mut seen_node_vars: HashSet<&str> = HashSet::new();
53 let mut seen_edge_vars: HashSet<&str> = HashSet::new();
54 for element in &query.pattern.elements {
55 match element {
56 PatternElement::Node(node) => {
57 if let Some(var) = node.variable.as_deref() {
58 if !seen_node_vars.insert(var) {
59 return Err(QueryError::Unsupported(format!(
60 "repeated node variable '{var}' (cycle / self-reachability \
61 requires alias-equality predicates not yet implemented)"
62 )));
63 }
64 }
65 }
66 PatternElement::Edge(edge) => {
67 if let Some(var) = edge.variable.as_deref() {
68 if !seen_edge_vars.insert(var) {
69 return Err(QueryError::Unsupported(format!(
70 "repeated edge variable '{var}' not supported"
71 )));
72 }
73 }
74 }
75 }
76 }
77
78 for element in &mut query.pattern.elements {
79 match element {
80 PatternElement::Node(node) => {
81 if node.properties.contains_key("namespace") {
82 return Err(QueryError::Validation(
83 "namespace is set by CompileOptions, not query text".into(),
84 ));
85 }
86 }
87 PatternElement::Edge(edge) => {
88 for relation in edge.relations.iter_mut() {
89 if relation.starts_with("observed_as_") {
94 continue;
95 }
96 let parsed = EdgeRelation::from_str(relation)
97 .map_err(|err| QueryError::Validation(err.to_string()))?;
98 *relation = parsed.as_str().to_string();
99 }
100 if edge.min_hops == 0 {
101 return Err(QueryError::Unsupported(
102 "zero-hop ranges (min_hops = 0) not yet supported; \
103 use a minimum of 1 hop"
104 .into(),
105 ));
106 }
107 if edge.min_hops > edge.max_hops {
110 return Err(QueryError::Validation(format!(
111 "invalid hop range: min {} > max {}",
112 edge.min_hops, edge.max_hops
113 )));
114 }
115 if edge.min_hops > MAX_DEPTH {
119 return Err(QueryError::Unsupported(format!(
120 "minimum hop count {} exceeds depth cap {}",
121 edge.min_hops, MAX_DEPTH
122 )));
123 }
124 if edge.max_hops > MAX_DEPTH {
126 return Err(QueryError::InvalidInput(format!(
127 "max_hops {} exceeds the depth cap of {}; reduce the range or use a smaller bound",
128 edge.max_hops, MAX_DEPTH
129 )));
130 }
131 }
132 }
133 }
134
135 let mut var_kinds: std::collections::HashMap<&str, VarKind> = std::collections::HashMap::new();
140 for element in &query.pattern.elements {
141 match element {
142 PatternElement::Node(n) => {
143 if let Some(v) = n.variable.as_deref() {
144 var_kinds.insert(v, VarKind::Node);
145 }
146 }
147 PatternElement::Edge(e) => {
148 if let Some(v) = e.variable.as_deref() {
149 var_kinds.insert(v, VarKind::Edge);
150 }
151 }
152 }
153 }
154
155 let mut validate_err: Option<QueryError> = None;
157 query.where_clause.for_each_condition_mut(&mut |cond| {
158 if validate_err.is_some() {
159 return;
160 }
161 let is_edge = var_kinds
162 .get(cond.variable.as_str())
163 .copied()
164 .unwrap_or(VarKind::Node)
165 == VarKind::Edge;
166 if let Err(e) = validate_condition(cond, is_edge) {
167 validate_err = Some(e);
168 }
169 });
170 if let Some(e) = validate_err {
171 return Err(e);
172 }
173
174 Ok(warnings)
175}
176
177#[derive(Clone, Copy, PartialEq, Eq)]
178enum VarKind {
179 Node,
180 Edge,
181}
182
183fn validate_condition(cond: &mut Condition, is_edge: bool) -> Result<(), QueryError> {
184 match cond.property.as_str() {
185 "namespace" => Err(QueryError::Validation(
186 "namespace is set by CompileOptions, not query text".into(),
187 )),
188 "kind" if !is_edge => Ok(()),
189 "relation" if is_edge => {
190 if let ConditionValue::String(ref mut s) = cond.value {
191 let parsed = EdgeRelation::from_str(s)
192 .map_err(|err| QueryError::Validation(err.to_string()))?;
193 *s = parsed.as_str().to_string();
194 }
195 Ok(())
196 }
197 _ => Ok(()),
198 }
199}
200
201#[cfg(test)]
202mod tests {
203 use super::*;
204 use crate::parsers::gql;
205
206 #[test]
207 fn node_kind_passes_through_unchanged() {
208 let mut q = gql::parse("MATCH (a:paper)-[:introduced_by]->(b:concept) RETURN a").unwrap();
210 validate(&mut q).unwrap();
211 let kinds: Vec<_> = q
212 .pattern
213 .nodes()
214 .map(|n| n.kind.as_deref().unwrap_or(""))
215 .collect();
216 assert_eq!(kinds, vec!["paper", "concept"]);
217 }
218
219 #[test]
220 fn normalises_relation_case_and_hyphens() {
221 let mut q = gql::parse("MATCH (a)-[:Introduced_By]->(b) RETURN a").unwrap();
222 validate(&mut q).unwrap();
223 let rels: Vec<_> = q
224 .pattern
225 .edges()
226 .flat_map(|e| e.relations.iter().cloned())
227 .collect();
228 assert_eq!(rels, vec!["introduced_by".to_string()]);
229 }
230
231 #[test]
232 fn rejects_unknown_relation() {
233 let mut q = gql::parse("MATCH (a)-[:not_a_relation]->(b) RETURN a").unwrap();
234 let err = validate(&mut q).unwrap_err();
235 let msg = err.to_string();
236 assert!(msg.contains("not_a_relation"), "msg: {msg}");
237 }
238
239 #[test]
240 fn unknown_kind_passes_through() {
241 let mut q = gql::parse("MATCH (a:gizmo)-[:extends]->(b) RETURN a").unwrap();
243 validate(&mut q).unwrap();
244 }
245
246 #[test]
247 fn rejects_depth_above_max() {
248 let mut q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
251 let err = validate(&mut q).unwrap_err();
252 assert!(
253 matches!(err, QueryError::InvalidInput(_)),
254 "expected InvalidInput, got {err:?}"
255 );
256 assert!(
257 err.to_string().contains("50"),
258 "error should mention requested depth: {err}"
259 );
260 }
261
262 #[test]
263 fn rejects_depth_above_max_warnings_path() {
264 let mut q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
266 let err = validate_with_warnings(&mut q).unwrap_err();
267 assert!(
268 matches!(err, QueryError::InvalidInput(_)),
269 "expected InvalidInput, got {err:?}"
270 );
271 }
272
273 #[test]
274 fn multi_relation_all_normalised() {
275 let mut q = gql::parse("MATCH (a)-[:Extends|VARIANT_OF]->(b) RETURN a").unwrap();
276 validate(&mut q).unwrap();
277 let edge = q.pattern.edges().next().unwrap();
278 assert_eq!(
279 edge.relations,
280 vec!["extends".to_string(), "variant_of".to_string()]
281 );
282 }
283
284 #[test]
285 fn rejects_namespace_in_where() {
286 let mut q =
287 gql::parse("MATCH (a:concept)-[:extends]->(b) WHERE a.namespace = 'other' RETURN a")
288 .unwrap();
289 let err = validate(&mut q).unwrap_err();
290 assert!(err.to_string().contains("namespace"), "msg: {err}");
291 }
292
293 #[test]
294 fn rejects_namespace_in_node_properties() {
295 let mut q =
296 gql::parse("MATCH (a:concept {namespace: 'other'})-[:extends]->(b) RETURN a").unwrap();
297 let err = validate(&mut q).unwrap_err();
298 assert!(err.to_string().contains("namespace"), "msg: {err}");
299 }
300
301 #[test]
302 fn rejects_unknown_relation_in_where() {
303 let mut q =
304 gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'related_to' RETURN a")
305 .unwrap();
306 let err = validate(&mut q).unwrap_err();
307 assert!(err.to_string().contains("related_to"), "msg: {err}");
308 }
309
310 fn first_condition_string_value(q: &GqlQuery) -> String {
311 match q.where_clause.conditions().next().unwrap().value {
312 ConditionValue::String(ref s) => s.clone(),
313 _ => panic!("expected string condition value"),
314 }
315 }
316
317 #[test]
318 fn unknown_kind_in_where_passes_through() {
319 let mut q =
321 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'gizmo' RETURN a").unwrap();
322 validate(&mut q).unwrap();
323 assert_eq!(first_condition_string_value(&q), "gizmo");
324 }
325
326 #[test]
327 fn kind_in_where_passes_through_unchanged() {
328 let mut q =
330 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'paper' RETURN a").unwrap();
331 validate(&mut q).unwrap();
332 assert_eq!(first_condition_string_value(&q), "paper");
333 }
334
335 #[test]
336 fn normalises_relation_alias_in_where() {
337 let mut q =
338 gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'Introduced_By' RETURN a")
339 .unwrap();
340 validate(&mut q).unwrap();
341 assert_eq!(first_condition_string_value(&q), "introduced_by");
342 }
343
344 #[test]
345 fn rejects_zero_hop_range_gql_wide() {
346 let mut q = gql::parse("MATCH (a)-[:extends*0..3]->(b) RETURN b").unwrap();
347 let err = validate(&mut q).unwrap_err();
348 assert!(
349 matches!(err, QueryError::Unsupported(_)),
350 "expected Unsupported, got {err:?}"
351 );
352 }
353
354 #[test]
355 fn rejects_zero_hop_range_gql_narrow() {
356 let mut q = gql::parse("MATCH (a)-[:extends*0..1]->(b) RETURN b").unwrap();
360 let err = validate(&mut q).unwrap_err();
361 assert!(
362 matches!(err, QueryError::Unsupported(_)),
363 "expected Unsupported, got {err:?}"
364 );
365 }
366
367 #[test]
368 fn rejects_zero_hop_sparql_explicit_range() {
369 use crate::parsers::sparql;
370 let mut q = sparql::parse("SELECT ?a ?b WHERE { ?a :extends{0,3} ?b . }").unwrap();
371 let err = validate(&mut q).unwrap_err();
372 assert!(
373 matches!(err, QueryError::Unsupported(_)),
374 "expected Unsupported, got {err:?}"
375 );
376 }
377
378 #[test]
379 fn rejects_repeated_node_var_cycle_gql() {
380 let mut q = gql::parse("MATCH (a)-[:extends]->(b)-[:variant_of]->(a) RETURN a").unwrap();
381 let err = validate(&mut q).unwrap_err();
382 assert!(
383 matches!(err, QueryError::Unsupported(_)),
384 "expected Unsupported, got {err:?}"
385 );
386 }
387
388 #[test]
389 fn rejects_repeated_node_var_self_reach_variable_length() {
390 let mut q = gql::parse("MATCH (a)-[:extends*1..3]->(a) RETURN a").unwrap();
391 let err = validate(&mut q).unwrap_err();
392 assert!(
393 matches!(err, QueryError::Unsupported(_)),
394 "expected Unsupported, got {err:?}"
395 );
396 }
397
398 #[test]
399 fn rejects_repeated_node_var_cycle_sparql() {
400 use crate::parsers::sparql;
401 let mut q =
402 sparql::parse("SELECT ?a WHERE { ?a :extends ?b . ?b :variant_of ?a . }").unwrap();
403 let err = validate(&mut q).unwrap_err();
404 assert!(
405 matches!(err, QueryError::Unsupported(_)),
406 "expected Unsupported, got {err:?}"
407 );
408 }
409
410 #[test]
411 fn rejects_repeated_edge_var() {
412 let mut q = gql::parse("MATCH (a)-[e:extends]->(b)-[e:variant_of]->(c) RETURN c").unwrap();
413 let err = validate(&mut q).unwrap_err();
414 assert!(
415 matches!(err, QueryError::Unsupported(_)),
416 "expected Unsupported, got {err:?}"
417 );
418 }
419
420 #[test]
421 fn rejects_inverted_range() {
422 let mut q = gql::parse("MATCH (a)-[:extends*3..1]->(b) RETURN b").unwrap();
424 let err = validate(&mut q).unwrap_err();
425 assert!(
426 matches!(err, QueryError::Validation(_)),
427 "expected Validation error, got {err:?}"
428 );
429 }
430
431 #[test]
432 fn rejects_min_hops_above_depth_cap() {
433 let mut q = gql::parse("MATCH (a)-[:extends*50..100]->(b) RETURN b").unwrap();
436 let err = validate(&mut q).unwrap_err();
437 assert!(
438 matches!(err, QueryError::Unsupported(_)),
439 "expected Unsupported, got {err:?}"
440 );
441 }
442
443 #[test]
444 fn rejects_max_above_depth_cap_with_satisfiable_min() {
445 let mut q = gql::parse("MATCH (a)-[:extends*2..50]->(b) RETURN b").unwrap();
447 let err = validate(&mut q).unwrap_err();
448 assert!(
449 matches!(err, QueryError::InvalidInput(_)),
450 "expected InvalidInput, got {err:?}"
451 );
452 }
453
454 #[test]
455 fn node_property_named_relation_allowed() {
456 let mut q =
459 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.relation = 'external' RETURN a").unwrap();
460 validate(&mut q).unwrap();
461 assert_eq!(first_condition_string_value(&q), "external");
462 }
463
464 #[test]
465 fn edge_relation_still_validated() {
466 let mut q = gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'not_real' RETURN a")
469 .unwrap();
470 let err = validate(&mut q).unwrap_err();
471 assert!(err.to_string().contains("not_real"), "msg: {err}");
472 }
473}