1use std::collections::HashSet;
22use std::str::FromStr;
23
24use khive_types::EdgeRelation;
25
26use crate::ast::{Condition, ConditionValue, GqlQuery, PatternElement};
27use crate::error::QueryError;
28
29pub const MAX_DEPTH: usize = 10;
31
32pub fn validate(query: &mut GqlQuery) -> Result<(), QueryError> {
37 validate_with_warnings(query).map(|_| ())
38}
39
40pub fn validate_with_warnings(query: &mut GqlQuery) -> Result<Vec<String>, QueryError> {
44 let mut warnings = Vec::new();
45
46 let mut seen_node_vars: HashSet<&str> = HashSet::new();
51 let mut seen_edge_vars: HashSet<&str> = HashSet::new();
52 for element in &query.pattern.elements {
53 match element {
54 PatternElement::Node(node) => {
55 if let Some(var) = node.variable.as_deref() {
56 if !seen_node_vars.insert(var) {
57 return Err(QueryError::Unsupported(format!(
58 "repeated node variable '{var}' (cycle / self-reachability \
59 requires alias-equality predicates not yet implemented)"
60 )));
61 }
62 }
63 }
64 PatternElement::Edge(edge) => {
65 if let Some(var) = edge.variable.as_deref() {
66 if !seen_edge_vars.insert(var) {
67 return Err(QueryError::Unsupported(format!(
68 "repeated edge variable '{var}' not supported"
69 )));
70 }
71 }
72 }
73 }
74 }
75
76 for element in &mut query.pattern.elements {
77 match element {
78 PatternElement::Node(node) => {
79 if node.properties.contains_key("namespace") {
80 return Err(QueryError::Validation(
81 "namespace is set by CompileOptions, not query text".into(),
82 ));
83 }
84 }
85 PatternElement::Edge(edge) => {
86 for relation in edge.relations.iter_mut() {
87 let parsed = EdgeRelation::from_str(relation)
88 .map_err(|err| QueryError::Validation(err.to_string()))?;
89 *relation = parsed.as_str().to_string();
90 }
91 if edge.min_hops == 0 {
92 return Err(QueryError::Unsupported(
93 "zero-hop ranges (min_hops = 0) not yet supported; \
94 use a minimum of 1 hop"
95 .into(),
96 ));
97 }
98 if edge.min_hops > edge.max_hops {
101 return Err(QueryError::Validation(format!(
102 "invalid hop range: min {} > max {}",
103 edge.min_hops, edge.max_hops
104 )));
105 }
106 if edge.min_hops > MAX_DEPTH {
110 return Err(QueryError::Unsupported(format!(
111 "minimum hop count {} exceeds depth cap {}",
112 edge.min_hops, MAX_DEPTH
113 )));
114 }
115 if edge.max_hops > MAX_DEPTH {
117 let requested = edge.max_hops;
118 edge.max_hops = MAX_DEPTH;
119 warnings.push(format!(
120 "Query depth capped at {MAX_DEPTH} hops (requested {requested})"
121 ));
122 }
123 }
124 }
125 }
126
127 let mut var_kinds: std::collections::HashMap<&str, VarKind> = std::collections::HashMap::new();
132 for element in &query.pattern.elements {
133 match element {
134 PatternElement::Node(n) => {
135 if let Some(v) = n.variable.as_deref() {
136 var_kinds.insert(v, VarKind::Node);
137 }
138 }
139 PatternElement::Edge(e) => {
140 if let Some(v) = e.variable.as_deref() {
141 var_kinds.insert(v, VarKind::Edge);
142 }
143 }
144 }
145 }
146
147 for cond in query.where_clause.iter_mut() {
148 let is_edge = var_kinds
149 .get(cond.variable.as_str())
150 .copied()
151 .unwrap_or(VarKind::Node)
152 == VarKind::Edge;
153 validate_condition(cond, is_edge)?;
154 }
155
156 Ok(warnings)
157}
158
159#[derive(Clone, Copy, PartialEq, Eq)]
160enum VarKind {
161 Node,
162 Edge,
163}
164
165fn validate_condition(cond: &mut Condition, is_edge: bool) -> Result<(), QueryError> {
166 match cond.property.as_str() {
167 "namespace" => Err(QueryError::Validation(
168 "namespace is set by CompileOptions, not query text".into(),
169 )),
170 "kind" if !is_edge => Ok(()),
171 "relation" if is_edge => {
172 if let ConditionValue::String(ref mut s) = cond.value {
173 let parsed = EdgeRelation::from_str(s)
174 .map_err(|err| QueryError::Validation(err.to_string()))?;
175 *s = parsed.as_str().to_string();
176 }
177 Ok(())
178 }
179 _ => Ok(()),
180 }
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186 use crate::parsers::gql;
187
188 #[test]
189 fn node_kind_passes_through_unchanged() {
190 let mut q = gql::parse("MATCH (a:paper)-[:introduced_by]->(b:concept) RETURN a").unwrap();
192 validate(&mut q).unwrap();
193 let kinds: Vec<_> = q
194 .pattern
195 .nodes()
196 .map(|n| n.kind.as_deref().unwrap_or(""))
197 .collect();
198 assert_eq!(kinds, vec!["paper", "concept"]);
199 }
200
201 #[test]
202 fn normalises_relation_case_and_hyphens() {
203 let mut q = gql::parse("MATCH (a)-[:Introduced_By]->(b) RETURN a").unwrap();
204 validate(&mut q).unwrap();
205 let rels: Vec<_> = q
206 .pattern
207 .edges()
208 .flat_map(|e| e.relations.iter().cloned())
209 .collect();
210 assert_eq!(rels, vec!["introduced_by".to_string()]);
211 }
212
213 #[test]
214 fn rejects_unknown_relation() {
215 let mut q = gql::parse("MATCH (a)-[:not_a_relation]->(b) RETURN a").unwrap();
216 let err = validate(&mut q).unwrap_err();
217 let msg = err.to_string();
218 assert!(msg.contains("not_a_relation"), "msg: {msg}");
219 }
220
221 #[test]
222 fn unknown_kind_passes_through() {
223 let mut q = gql::parse("MATCH (a:gizmo)-[:extends]->(b) RETURN a").unwrap();
225 validate(&mut q).unwrap();
226 }
227
228 #[test]
229 fn clamps_depth_above_max() {
230 let mut q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
231 validate(&mut q).unwrap();
232 let edge = q.pattern.edges().next().unwrap();
233 assert_eq!(edge.max_hops, MAX_DEPTH);
234 assert!(edge.min_hops <= edge.max_hops);
235 }
236
237 #[test]
238 fn warns_when_clamping_depth_above_max() {
239 let mut q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
240 let warnings = validate_with_warnings(&mut q).unwrap();
241 assert_eq!(q.pattern.edges().next().unwrap().max_hops, MAX_DEPTH);
242 assert!(
243 warnings
244 .iter()
245 .any(|w| w.contains("Query depth capped at 10")),
246 "warnings: {warnings:?}"
247 );
248 }
249
250 #[test]
251 fn multi_relation_all_normalised() {
252 let mut q = gql::parse("MATCH (a)-[:Extends|VARIANT_OF]->(b) RETURN a").unwrap();
253 validate(&mut q).unwrap();
254 let edge = q.pattern.edges().next().unwrap();
255 assert_eq!(
256 edge.relations,
257 vec!["extends".to_string(), "variant_of".to_string()]
258 );
259 }
260
261 #[test]
262 fn rejects_namespace_in_where() {
263 let mut q =
264 gql::parse("MATCH (a:concept)-[:extends]->(b) WHERE a.namespace = 'other' RETURN a")
265 .unwrap();
266 let err = validate(&mut q).unwrap_err();
267 assert!(err.to_string().contains("namespace"), "msg: {err}");
268 }
269
270 #[test]
271 fn rejects_namespace_in_node_properties() {
272 let mut q =
273 gql::parse("MATCH (a:concept {namespace: 'other'})-[:extends]->(b) RETURN a").unwrap();
274 let err = validate(&mut q).unwrap_err();
275 assert!(err.to_string().contains("namespace"), "msg: {err}");
276 }
277
278 #[test]
279 fn rejects_unknown_relation_in_where() {
280 let mut q =
281 gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'related_to' RETURN a")
282 .unwrap();
283 let err = validate(&mut q).unwrap_err();
284 assert!(err.to_string().contains("related_to"), "msg: {err}");
285 }
286
287 #[test]
288 fn unknown_kind_in_where_passes_through() {
289 let mut q =
291 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'gizmo' RETURN a").unwrap();
292 validate(&mut q).unwrap();
293 let val = match &q.where_clause[0].value {
294 ConditionValue::String(s) => s.clone(),
295 _ => panic!("expected string"),
296 };
297 assert_eq!(val, "gizmo");
298 }
299
300 #[test]
301 fn kind_in_where_passes_through_unchanged() {
302 let mut q =
304 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'paper' RETURN a").unwrap();
305 validate(&mut q).unwrap();
306 let val = match &q.where_clause[0].value {
307 ConditionValue::String(s) => s.clone(),
308 _ => panic!("expected string"),
309 };
310 assert_eq!(val, "paper");
311 }
312
313 #[test]
314 fn normalises_relation_alias_in_where() {
315 let mut q =
316 gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'Introduced_By' RETURN a")
317 .unwrap();
318 validate(&mut q).unwrap();
319 let val = match &q.where_clause[0].value {
320 ConditionValue::String(s) => s.clone(),
321 _ => panic!("expected string"),
322 };
323 assert_eq!(val, "introduced_by");
324 }
325
326 #[test]
327 fn rejects_zero_hop_range_gql_wide() {
328 let mut q = gql::parse("MATCH (a)-[:extends*0..3]->(b) RETURN b").unwrap();
329 let err = validate(&mut q).unwrap_err();
330 assert!(
331 matches!(err, QueryError::Unsupported(_)),
332 "expected Unsupported, got {err:?}"
333 );
334 }
335
336 #[test]
337 fn rejects_zero_hop_range_gql_narrow() {
338 let mut q = gql::parse("MATCH (a)-[:extends*0..1]->(b) RETURN b").unwrap();
342 let err = validate(&mut q).unwrap_err();
343 assert!(
344 matches!(err, QueryError::Unsupported(_)),
345 "expected Unsupported, got {err:?}"
346 );
347 }
348
349 #[test]
350 fn rejects_zero_hop_sparql_explicit_range() {
351 use crate::parsers::sparql;
352 let mut q = sparql::parse("SELECT ?a ?b WHERE { ?a :extends{0,3} ?b . }").unwrap();
353 let err = validate(&mut q).unwrap_err();
354 assert!(
355 matches!(err, QueryError::Unsupported(_)),
356 "expected Unsupported, got {err:?}"
357 );
358 }
359
360 #[test]
361 fn rejects_repeated_node_var_cycle_gql() {
362 let mut q = gql::parse("MATCH (a)-[:extends]->(b)-[:variant_of]->(a) RETURN a").unwrap();
363 let err = validate(&mut q).unwrap_err();
364 assert!(
365 matches!(err, QueryError::Unsupported(_)),
366 "expected Unsupported, got {err:?}"
367 );
368 }
369
370 #[test]
371 fn rejects_repeated_node_var_self_reach_variable_length() {
372 let mut q = gql::parse("MATCH (a)-[:extends*1..3]->(a) RETURN a").unwrap();
373 let err = validate(&mut q).unwrap_err();
374 assert!(
375 matches!(err, QueryError::Unsupported(_)),
376 "expected Unsupported, got {err:?}"
377 );
378 }
379
380 #[test]
381 fn rejects_repeated_node_var_cycle_sparql() {
382 use crate::parsers::sparql;
383 let mut q =
384 sparql::parse("SELECT ?a WHERE { ?a :extends ?b . ?b :variant_of ?a . }").unwrap();
385 let err = validate(&mut q).unwrap_err();
386 assert!(
387 matches!(err, QueryError::Unsupported(_)),
388 "expected Unsupported, got {err:?}"
389 );
390 }
391
392 #[test]
393 fn rejects_repeated_edge_var() {
394 let mut q = gql::parse("MATCH (a)-[e:extends]->(b)-[e:variant_of]->(c) RETURN c").unwrap();
395 let err = validate(&mut q).unwrap_err();
396 assert!(
397 matches!(err, QueryError::Unsupported(_)),
398 "expected Unsupported, got {err:?}"
399 );
400 }
401
402 #[test]
403 fn rejects_inverted_range() {
404 let mut q = gql::parse("MATCH (a)-[:extends*3..1]->(b) RETURN b").unwrap();
406 let err = validate(&mut q).unwrap_err();
407 assert!(
408 matches!(err, QueryError::Validation(_)),
409 "expected Validation error, got {err:?}"
410 );
411 }
412
413 #[test]
414 fn rejects_min_hops_above_depth_cap() {
415 let mut q = gql::parse("MATCH (a)-[:extends*50..100]->(b) RETURN b").unwrap();
418 let err = validate(&mut q).unwrap_err();
419 assert!(
420 matches!(err, QueryError::Unsupported(_)),
421 "expected Unsupported, got {err:?}"
422 );
423 }
424
425 #[test]
426 fn clamps_max_but_keeps_satisfiable_min() {
427 let mut q = gql::parse("MATCH (a)-[:extends*2..50]->(b) RETURN b").unwrap();
429 validate(&mut q).unwrap();
430 let edge = q.pattern.edges().next().unwrap();
431 assert_eq!(edge.min_hops, 2);
432 assert_eq!(edge.max_hops, MAX_DEPTH);
433 }
434
435 #[test]
436 fn node_property_named_relation_allowed() {
437 let mut q =
440 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.relation = 'external' RETURN a").unwrap();
441 validate(&mut q).unwrap();
442 let val = match &q.where_clause[0].value {
443 ConditionValue::String(s) => s.clone(),
444 _ => panic!("expected string"),
445 };
446 assert_eq!(val, "external");
447 }
448
449 #[test]
450 fn edge_relation_still_validated() {
451 let mut q = gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'not_real' RETURN a")
454 .unwrap();
455 let err = validate(&mut q).unwrap_err();
456 assert!(err.to_string().contains("not_real"), "msg: {err}");
457 }
458}