1use std::collections::HashSet;
23use std::str::FromStr;
24
25use khive_types::{EdgeRelation, EntityKind};
26
27use crate::ast::{Condition, ConditionValue, GqlQuery, PatternElement};
28use crate::error::QueryError;
29
30pub const MAX_DEPTH: usize = 10;
32
33pub fn validate(query: &mut GqlQuery) -> Result<(), QueryError> {
39 let mut seen_node_vars: HashSet<&str> = HashSet::new();
44 let mut seen_edge_vars: HashSet<&str> = HashSet::new();
45 for element in &query.pattern.elements {
46 match element {
47 PatternElement::Node(node) => {
48 if let Some(var) = node.variable.as_deref() {
49 if !seen_node_vars.insert(var) {
50 return Err(QueryError::Unsupported(format!(
51 "repeated node variable '{var}' (cycle / self-reachability \
52 requires alias-equality predicates not yet implemented)"
53 )));
54 }
55 }
56 }
57 PatternElement::Edge(edge) => {
58 if let Some(var) = edge.variable.as_deref() {
59 if !seen_edge_vars.insert(var) {
60 return Err(QueryError::Unsupported(format!(
61 "repeated edge variable '{var}' not supported"
62 )));
63 }
64 }
65 }
66 }
67 }
68
69 for element in &mut query.pattern.elements {
70 match element {
71 PatternElement::Node(node) => {
72 if let Some(kind) = node.kind.as_mut() {
73 let parsed = EntityKind::from_str(kind).map_err(QueryError::Validation)?;
74 *kind = parsed.name().to_string();
75 }
76 if node.properties.contains_key("namespace") {
77 return Err(QueryError::Validation(
78 "namespace is set by CompileOptions, not query text".into(),
79 ));
80 }
81 }
82 PatternElement::Edge(edge) => {
83 for relation in edge.relations.iter_mut() {
84 let parsed = EdgeRelation::from_str(relation)
85 .map_err(|err| QueryError::Validation(err.to_string()))?;
86 *relation = parsed.as_str().to_string();
87 }
88 if edge.min_hops == 0 {
89 return Err(QueryError::Unsupported(
90 "zero-hop ranges (min_hops = 0) not yet supported; \
91 use a minimum of 1 hop"
92 .into(),
93 ));
94 }
95 if edge.min_hops > edge.max_hops {
98 return Err(QueryError::Validation(format!(
99 "invalid hop range: min {} > max {}",
100 edge.min_hops, edge.max_hops
101 )));
102 }
103 if edge.min_hops > MAX_DEPTH {
107 return Err(QueryError::Unsupported(format!(
108 "minimum hop count {} exceeds depth cap {}",
109 edge.min_hops, MAX_DEPTH
110 )));
111 }
112 if edge.max_hops > MAX_DEPTH {
115 edge.max_hops = MAX_DEPTH;
116 }
117 }
118 }
119 }
120
121 let mut var_kinds: std::collections::HashMap<&str, VarKind> = std::collections::HashMap::new();
126 for element in &query.pattern.elements {
127 match element {
128 PatternElement::Node(n) => {
129 if let Some(v) = n.variable.as_deref() {
130 var_kinds.insert(v, VarKind::Node);
131 }
132 }
133 PatternElement::Edge(e) => {
134 if let Some(v) = e.variable.as_deref() {
135 var_kinds.insert(v, VarKind::Edge);
136 }
137 }
138 }
139 }
140
141 for cond in query.where_clause.iter_mut() {
142 let is_edge = var_kinds
143 .get(cond.variable.as_str())
144 .copied()
145 .unwrap_or(VarKind::Node)
146 == VarKind::Edge;
147 validate_condition(cond, is_edge)?;
148 }
149
150 Ok(())
151}
152
153#[derive(Clone, Copy, PartialEq, Eq)]
154enum VarKind {
155 Node,
156 Edge,
157}
158
159fn validate_condition(cond: &mut Condition, is_edge: bool) -> Result<(), QueryError> {
160 match cond.property.as_str() {
161 "namespace" => Err(QueryError::Validation(
162 "namespace is set by CompileOptions, not query text".into(),
163 )),
164 "kind" if !is_edge => {
165 if let ConditionValue::String(ref mut s) = cond.value {
166 let parsed = EntityKind::from_str(s).map_err(QueryError::Validation)?;
167 *s = parsed.name().to_string();
168 }
169 Ok(())
170 }
171 "relation" if is_edge => {
172 if let ConditionValue::String(ref mut s) = cond.value {
173 let parsed = EdgeRelation::from_str(s)
174 .map_err(|err| QueryError::Validation(err.to_string()))?;
175 *s = parsed.as_str().to_string();
176 }
177 Ok(())
178 }
179 _ => Ok(()),
180 }
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186 use crate::parsers::gql;
187
188 #[test]
189 fn normalises_entity_kind_aliases() {
190 let mut q = gql::parse("MATCH (a:paper)-[:introduced_by]->(b:concept) RETURN a").unwrap();
191 validate(&mut q).unwrap();
192 let kinds: Vec<_> = q
193 .pattern
194 .nodes()
195 .map(|n| n.kind.as_deref().unwrap_or(""))
196 .collect();
197 assert_eq!(kinds, vec!["document", "concept"]);
198 }
199
200 #[test]
201 fn normalises_relation_case_and_hyphens() {
202 let mut q = gql::parse("MATCH (a)-[:Introduced_By]->(b) RETURN a").unwrap();
203 validate(&mut q).unwrap();
204 let rels: Vec<_> = q
205 .pattern
206 .edges()
207 .flat_map(|e| e.relations.iter().cloned())
208 .collect();
209 assert_eq!(rels, vec!["introduced_by".to_string()]);
210 }
211
212 #[test]
213 fn rejects_unknown_relation() {
214 let mut q = gql::parse("MATCH (a)-[:not_a_relation]->(b) RETURN a").unwrap();
215 let err = validate(&mut q).unwrap_err();
216 let msg = err.to_string();
217 assert!(msg.contains("not_a_relation"), "msg: {msg}");
218 }
219
220 #[test]
221 fn rejects_unknown_kind() {
222 let mut q = gql::parse("MATCH (a:gizmo)-[:extends]->(b) RETURN a").unwrap();
223 let err = validate(&mut q).unwrap_err();
224 let msg = err.to_string();
225 assert!(msg.contains("gizmo"), "msg: {msg}");
226 }
227
228 #[test]
229 fn clamps_depth_above_max() {
230 let mut q = gql::parse("MATCH (a)-[:extends*1..50]->(b) RETURN b").unwrap();
231 validate(&mut q).unwrap();
232 let edge = q.pattern.edges().next().unwrap();
233 assert_eq!(edge.max_hops, MAX_DEPTH);
234 assert!(edge.min_hops <= edge.max_hops);
235 }
236
237 #[test]
238 fn multi_relation_all_normalised() {
239 let mut q = gql::parse("MATCH (a)-[:Extends|VARIANT_OF]->(b) RETURN a").unwrap();
240 validate(&mut q).unwrap();
241 let edge = q.pattern.edges().next().unwrap();
242 assert_eq!(
243 edge.relations,
244 vec!["extends".to_string(), "variant_of".to_string()]
245 );
246 }
247
248 #[test]
249 fn rejects_namespace_in_where() {
250 let mut q =
251 gql::parse("MATCH (a:concept)-[:extends]->(b) WHERE a.namespace = 'other' RETURN a")
252 .unwrap();
253 let err = validate(&mut q).unwrap_err();
254 assert!(err.to_string().contains("namespace"), "msg: {err}");
255 }
256
257 #[test]
258 fn rejects_namespace_in_node_properties() {
259 let mut q =
260 gql::parse("MATCH (a:concept {namespace: 'other'})-[:extends]->(b) RETURN a").unwrap();
261 let err = validate(&mut q).unwrap_err();
262 assert!(err.to_string().contains("namespace"), "msg: {err}");
263 }
264
265 #[test]
266 fn rejects_unknown_relation_in_where() {
267 let mut q =
268 gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'related_to' RETURN a")
269 .unwrap();
270 let err = validate(&mut q).unwrap_err();
271 assert!(err.to_string().contains("related_to"), "msg: {err}");
272 }
273
274 #[test]
275 fn rejects_unknown_kind_in_where() {
276 let mut q =
277 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'gizmo' RETURN a").unwrap();
278 let err = validate(&mut q).unwrap_err();
279 assert!(err.to_string().contains("gizmo"), "msg: {err}");
280 }
281
282 #[test]
283 fn normalises_kind_alias_in_where() {
284 let mut q =
285 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.kind = 'paper' RETURN a").unwrap();
286 validate(&mut q).unwrap();
287 let val = match &q.where_clause[0].value {
288 ConditionValue::String(s) => s.clone(),
289 _ => panic!("expected string"),
290 };
291 assert_eq!(val, "document");
292 }
293
294 #[test]
295 fn normalises_relation_alias_in_where() {
296 let mut q =
297 gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'Introduced_By' RETURN a")
298 .unwrap();
299 validate(&mut q).unwrap();
300 let val = match &q.where_clause[0].value {
301 ConditionValue::String(s) => s.clone(),
302 _ => panic!("expected string"),
303 };
304 assert_eq!(val, "introduced_by");
305 }
306
307 #[test]
308 fn rejects_zero_hop_range_gql_wide() {
309 let mut q = gql::parse("MATCH (a)-[:extends*0..3]->(b) RETURN b").unwrap();
310 let err = validate(&mut q).unwrap_err();
311 assert!(
312 matches!(err, QueryError::Unsupported(_)),
313 "expected Unsupported, got {err:?}"
314 );
315 }
316
317 #[test]
318 fn rejects_zero_hop_range_gql_narrow() {
319 let mut q = gql::parse("MATCH (a)-[:extends*0..1]->(b) RETURN b").unwrap();
323 let err = validate(&mut q).unwrap_err();
324 assert!(
325 matches!(err, QueryError::Unsupported(_)),
326 "expected Unsupported, got {err:?}"
327 );
328 }
329
330 #[test]
331 fn rejects_zero_hop_sparql_explicit_range() {
332 use crate::parsers::sparql;
333 let mut q = sparql::parse("SELECT ?a ?b WHERE { ?a :extends{0,3} ?b . }").unwrap();
334 let err = validate(&mut q).unwrap_err();
335 assert!(
336 matches!(err, QueryError::Unsupported(_)),
337 "expected Unsupported, got {err:?}"
338 );
339 }
340
341 #[test]
342 fn rejects_repeated_node_var_cycle_gql() {
343 let mut q = gql::parse("MATCH (a)-[:extends]->(b)-[:variant_of]->(a) RETURN a").unwrap();
344 let err = validate(&mut q).unwrap_err();
345 assert!(
346 matches!(err, QueryError::Unsupported(_)),
347 "expected Unsupported, got {err:?}"
348 );
349 }
350
351 #[test]
352 fn rejects_repeated_node_var_self_reach_variable_length() {
353 let mut q = gql::parse("MATCH (a)-[:extends*1..3]->(a) RETURN a").unwrap();
354 let err = validate(&mut q).unwrap_err();
355 assert!(
356 matches!(err, QueryError::Unsupported(_)),
357 "expected Unsupported, got {err:?}"
358 );
359 }
360
361 #[test]
362 fn rejects_repeated_node_var_cycle_sparql() {
363 use crate::parsers::sparql;
364 let mut q =
365 sparql::parse("SELECT ?a WHERE { ?a :extends ?b . ?b :variant_of ?a . }").unwrap();
366 let err = validate(&mut q).unwrap_err();
367 assert!(
368 matches!(err, QueryError::Unsupported(_)),
369 "expected Unsupported, got {err:?}"
370 );
371 }
372
373 #[test]
374 fn rejects_repeated_edge_var() {
375 let mut q = gql::parse("MATCH (a)-[e:extends]->(b)-[e:variant_of]->(c) RETURN c").unwrap();
376 let err = validate(&mut q).unwrap_err();
377 assert!(
378 matches!(err, QueryError::Unsupported(_)),
379 "expected Unsupported, got {err:?}"
380 );
381 }
382
383 #[test]
384 fn rejects_inverted_range() {
385 let mut q = gql::parse("MATCH (a)-[:extends*3..1]->(b) RETURN b").unwrap();
387 let err = validate(&mut q).unwrap_err();
388 assert!(
389 matches!(err, QueryError::Validation(_)),
390 "expected Validation error, got {err:?}"
391 );
392 }
393
394 #[test]
395 fn rejects_min_hops_above_depth_cap() {
396 let mut q = gql::parse("MATCH (a)-[:extends*50..100]->(b) RETURN b").unwrap();
399 let err = validate(&mut q).unwrap_err();
400 assert!(
401 matches!(err, QueryError::Unsupported(_)),
402 "expected Unsupported, got {err:?}"
403 );
404 }
405
406 #[test]
407 fn clamps_max_but_keeps_satisfiable_min() {
408 let mut q = gql::parse("MATCH (a)-[:extends*2..50]->(b) RETURN b").unwrap();
410 validate(&mut q).unwrap();
411 let edge = q.pattern.edges().next().unwrap();
412 assert_eq!(edge.min_hops, 2);
413 assert_eq!(edge.max_hops, MAX_DEPTH);
414 }
415
416 #[test]
417 fn node_property_named_relation_allowed() {
418 let mut q =
421 gql::parse("MATCH (a)-[:extends]->(b) WHERE a.relation = 'external' RETURN a").unwrap();
422 validate(&mut q).unwrap();
423 let val = match &q.where_clause[0].value {
424 ConditionValue::String(s) => s.clone(),
425 _ => panic!("expected string"),
426 };
427 assert_eq!(val, "external");
428 }
429
430 #[test]
431 fn edge_relation_still_validated() {
432 let mut q = gql::parse("MATCH (a)-[e:extends]->(b) WHERE e.relation = 'not_real' RETURN a")
435 .unwrap();
436 let err = validate(&mut q).unwrap_err();
437 assert!(err.to_string().contains("not_real"), "msg: {err}");
438 }
439}