1use crate::algebra::{Aggregate, Algebra, Expression, Term, Variable};
11use crate::query_analysis::{ValidationError, ValidationErrorType};
12use anyhow::Result;
13use std::collections::HashSet;
14
15pub struct QueryValidator {
17 config: ValidationConfig,
19 stats: ValidationStatistics,
21}
22
23#[derive(Debug, Clone)]
25pub struct ValidationConfig {
26 pub strict_compliance: bool,
28 pub performance_warnings: bool,
30 pub security_checks: bool,
32 pub max_complexity: usize,
34 pub max_triple_patterns: usize,
36 pub max_path_depth: usize,
38 pub warn_cartesian_products: bool,
40 pub check_type_consistency: bool,
42}
43
44impl Default for ValidationConfig {
45 fn default() -> Self {
46 Self {
47 strict_compliance: true,
48 performance_warnings: true,
49 security_checks: true,
50 max_complexity: 1000,
51 max_triple_patterns: 100,
52 max_path_depth: 10,
53 warn_cartesian_products: true,
54 check_type_consistency: true,
55 }
56 }
57}
58
59#[derive(Debug, Clone)]
61pub struct ValidationResult {
62 pub errors: Vec<ValidationError>,
64 pub warnings: Vec<ValidationWarning>,
66 pub complexity_score: usize,
68 pub is_valid: bool,
70}
71
72#[derive(Debug, Clone)]
74pub struct ValidationWarning {
75 pub warning_type: ValidationWarningType,
77 pub message: String,
79 pub location: String,
81 pub suggestion: Option<String>,
83 pub severity: u8,
85}
86
87#[derive(Debug, Clone, PartialEq, Eq)]
89pub enum ValidationWarningType {
90 Performance,
92 BestPractice,
94 CartesianProduct,
96 Deprecated,
98 TypeInconsistency,
100 UnboundedQuery,
102 ComplexFilter,
104 MissingIndexHint,
106}
107
108#[derive(Debug, Clone, Default)]
110pub struct ValidationStatistics {
111 pub total_validated: usize,
113 pub total_errors: usize,
115 pub total_warnings: usize,
117}
118
119impl QueryValidator {
120 pub fn new() -> Self {
122 Self::with_config(ValidationConfig::default())
123 }
124
125 pub fn with_config(config: ValidationConfig) -> Self {
127 Self {
128 config,
129 stats: ValidationStatistics::default(),
130 }
131 }
132
133 pub fn validate(&mut self, algebra: &Algebra) -> Result<ValidationResult> {
135 let mut errors = Vec::new();
136 let mut warnings = Vec::new();
137
138 self.validate_variable_bindings(algebra, &mut errors)?;
140
141 self.validate_aggregates(algebra, &mut errors)?;
143
144 if self.config.warn_cartesian_products {
146 Self::check_cartesian_products(algebra, &mut warnings)?;
147 }
148
149 Self::validate_filters(algebra, &mut errors, &mut warnings)?;
151
152 let complexity_score = Self::calculate_complexity(algebra)?;
154 if complexity_score > self.config.max_complexity {
155 errors.push(ValidationError {
156 error_type: ValidationErrorType::SemanticInconsistency,
157 message: format!(
158 "Query complexity ({}) exceeds maximum allowed ({})",
159 complexity_score, self.config.max_complexity
160 ),
161 location: "Overall query".to_string(),
162 suggestion: Some("Consider breaking the query into smaller parts".to_string()),
163 });
164 }
165
166 if self.config.performance_warnings {
168 self.check_performance_issues(algebra, &mut warnings)?;
169 }
170
171 if self.config.security_checks {
173 self.check_security_issues(algebra, &mut warnings)?;
174 }
175
176 if self.config.check_type_consistency {
178 self.check_type_consistency(algebra, &mut warnings)?;
179 }
180
181 self.stats.total_validated += 1;
183 self.stats.total_errors += errors.len();
184 self.stats.total_warnings += warnings.len();
185
186 let is_valid = errors.is_empty();
187
188 Ok(ValidationResult {
189 errors,
190 warnings,
191 complexity_score,
192 is_valid,
193 })
194 }
195
196 fn validate_variable_bindings(
198 &self,
199 algebra: &Algebra,
200 errors: &mut Vec<ValidationError>,
201 ) -> Result<()> {
202 match algebra {
203 Algebra::Project { variables, pattern } => {
204 let bound_vars = Self::collect_bound_variables(pattern)?;
205
206 for var in variables {
207 if !bound_vars.contains(var) {
208 errors.push(ValidationError {
209 error_type: ValidationErrorType::UnboundVariable,
210 message: format!(
211 "Variable ?{} in SELECT clause is not bound in the query pattern",
212 var.as_str()
213 ),
214 location: "SELECT clause".to_string(),
215 suggestion: Some(format!(
216 "Add a triple pattern that binds ?{}",
217 var.as_str()
218 )),
219 });
220 }
221 }
222 }
223 Algebra::Group {
224 pattern,
225 variables,
226 aggregates,
227 } => {
228 let bound_vars = Self::collect_bound_variables(pattern)?;
229
230 for group_cond in variables {
232 Self::validate_expression_variables(&group_cond.expr, &bound_vars, errors)?;
234 }
235
236 for (_, agg) in aggregates {
238 self.validate_aggregate_expression(agg, &bound_vars, errors)?;
239 }
240 }
241 Algebra::OrderBy { pattern, .. } => {
242 self.validate_variable_bindings(pattern, errors)?;
243 }
244 Algebra::Join { left, right } | Algebra::LeftJoin { left, right, .. } => {
245 self.validate_variable_bindings(left, errors)?;
246 self.validate_variable_bindings(right, errors)?;
247 }
248 Algebra::Union { left, right } => {
249 self.validate_variable_bindings(left, errors)?;
250 self.validate_variable_bindings(right, errors)?;
251 }
252 Algebra::Filter { pattern, .. } => {
253 self.validate_variable_bindings(pattern, errors)?;
254 }
255 _ => {}
256 }
257
258 Ok(())
259 }
260
261 fn collect_bound_variables(algebra: &Algebra) -> Result<HashSet<Variable>> {
263 let mut vars = HashSet::new();
264
265 match algebra {
266 Algebra::Bgp(patterns) => {
267 for pattern in patterns {
268 if let Term::Variable(v) = &pattern.subject {
269 vars.insert(v.clone());
270 }
271 if let Term::Variable(v) = &pattern.predicate {
272 vars.insert(v.clone());
273 }
274 if let Term::Variable(v) = &pattern.object {
275 vars.insert(v.clone());
276 }
277 }
278 }
279 Algebra::Join { left, right } => {
280 vars.extend(Self::collect_bound_variables(left)?);
281 vars.extend(Self::collect_bound_variables(right)?);
282 }
283 Algebra::LeftJoin { left, right, .. } => {
284 vars.extend(Self::collect_bound_variables(left)?);
285 vars.extend(Self::collect_bound_variables(right)?);
286 }
287 Algebra::Union { left, right } => {
288 vars.extend(Self::collect_bound_variables(left)?);
289 vars.extend(Self::collect_bound_variables(right)?);
290 }
291 Algebra::Filter { pattern, .. } => {
292 vars.extend(Self::collect_bound_variables(pattern)?);
293 }
294 Algebra::Project { pattern, .. } => {
295 vars.extend(Self::collect_bound_variables(pattern)?);
296 }
297 Algebra::Group { pattern, .. } => {
298 vars.extend(Self::collect_bound_variables(pattern)?);
299 }
300 _ => {}
301 }
302
303 Ok(vars)
304 }
305
306 fn validate_aggregates(
308 &self,
309 algebra: &Algebra,
310 errors: &mut Vec<ValidationError>,
311 ) -> Result<()> {
312 if let Algebra::Group {
313 variables: by,
314 aggregates,
315 pattern,
316 } = algebra
317 {
318 let bound_vars = Self::collect_bound_variables(pattern)?;
319
320 for (result_var, agg) in aggregates {
321 match agg {
323 Aggregate::Count { expr, .. } => {
324 if let Some(expr) = expr {
325 Self::validate_expression_variables(expr, &bound_vars, errors)?;
326 }
327 }
328 Aggregate::Sum { expr, .. }
329 | Aggregate::Avg { expr, .. }
330 | Aggregate::Min { expr, .. }
331 | Aggregate::Max { expr, .. } => {
332 Self::validate_expression_variables(expr, &bound_vars, errors)?;
333 }
334 Aggregate::GroupConcat { expr, .. } => {
335 Self::validate_expression_variables(expr, &bound_vars, errors)?;
336 }
337 Aggregate::Sample { expr, .. } => {
338 Self::validate_expression_variables(expr, &bound_vars, errors)?;
339 }
340 }
341
342 for group_cond in by {
344 if let Some(alias) = &group_cond.alias {
345 if alias == result_var {
346 errors.push(ValidationError {
347 error_type: ValidationErrorType::InvalidAggregate,
348 message: format!(
349 "Aggregate result variable ?{} conflicts with GROUP BY alias",
350 result_var.as_str()
351 ),
352 location: "GROUP BY clause".to_string(),
353 suggestion: Some(
354 "Use a different variable name for the aggregate result"
355 .to_string(),
356 ),
357 });
358 }
359 }
360 }
361 }
362 }
363
364 Ok(())
365 }
366
367 fn validate_aggregate_expression(
369 &self,
370 agg: &Aggregate,
371 bound_vars: &HashSet<Variable>,
372 errors: &mut Vec<ValidationError>,
373 ) -> Result<()> {
374 match agg {
375 Aggregate::Count { expr, .. } => {
376 if let Some(expr) = expr {
377 Self::validate_expression_variables(expr, bound_vars, errors)?;
378 }
379 }
380 Aggregate::Sum { expr, .. }
381 | Aggregate::Avg { expr, .. }
382 | Aggregate::Min { expr, .. }
383 | Aggregate::Max { expr, .. } => {
384 Self::validate_expression_variables(expr, bound_vars, errors)?;
385 }
386 Aggregate::GroupConcat { expr, .. } => {
387 Self::validate_expression_variables(expr, bound_vars, errors)?;
388 }
389 Aggregate::Sample { expr, .. } => {
390 Self::validate_expression_variables(expr, bound_vars, errors)?;
391 }
392 }
393 Ok(())
394 }
395
396 fn validate_expression_variables(
398 expr: &Expression,
399 bound_vars: &HashSet<Variable>,
400 errors: &mut Vec<ValidationError>,
401 ) -> Result<()> {
402 match expr {
403 Expression::Variable(v) => {
404 if !bound_vars.contains(v) {
405 errors.push(ValidationError {
406 error_type: ValidationErrorType::UnboundVariable,
407 message: format!(
408 "Variable ?{} used in expression is not bound",
409 v.as_str()
410 ),
411 location: "Expression".to_string(),
412 suggestion: Some(format!("Bind ?{} in a triple pattern", v.as_str())),
413 });
414 }
415 }
416 Expression::Binary { left, right, op: _ } => {
417 Self::validate_expression_variables(left, bound_vars, errors)?;
418 Self::validate_expression_variables(right, bound_vars, errors)?;
419 }
420 Expression::Unary { operand, op: _ } => {
421 Self::validate_expression_variables(operand, bound_vars, errors)?;
422 }
423 Expression::Function { args, .. } => {
424 for arg in args {
425 Self::validate_expression_variables(arg, bound_vars, errors)?;
426 }
427 }
428 _ => {}
429 }
430 Ok(())
431 }
432
433 fn check_cartesian_products(
435 algebra: &Algebra,
436 warnings: &mut Vec<ValidationWarning>,
437 ) -> Result<()> {
438 if let Algebra::Join { left, right } = algebra {
439 let left_vars = Self::collect_bound_variables(left)?;
440 let right_vars = Self::collect_bound_variables(right)?;
441
442 let shared_vars: HashSet<_> = left_vars.intersection(&right_vars).collect();
444
445 if shared_vars.is_empty() {
446 warnings.push(ValidationWarning {
447 warning_type: ValidationWarningType::CartesianProduct,
448 message: "Detected potential cartesian product: join without shared variables".to_string(),
449 location: "JOIN".to_string(),
450 suggestion: Some("Ensure the join patterns share at least one variable to avoid expensive cartesian products".to_string()),
451 severity: 8,
452 });
453 }
454
455 Self::check_cartesian_products(left, warnings)?;
457 Self::check_cartesian_products(right, warnings)?;
458 }
459
460 Ok(())
461 }
462
463 fn validate_filters(
465 algebra: &Algebra,
466 errors: &mut Vec<ValidationError>,
467 warnings: &mut Vec<ValidationWarning>,
468 ) -> Result<()> {
469 if let Algebra::Filter { pattern, condition } = algebra {
470 let bound_vars = Self::collect_bound_variables(pattern)?;
471
472 Self::validate_expression_variables(condition, &bound_vars, errors)?;
474
475 let complexity = Self::calculate_expression_complexity(condition);
477 if complexity > 10 {
478 warnings.push(ValidationWarning {
479 warning_type: ValidationWarningType::ComplexFilter,
480 message: format!("Filter expression has high complexity ({})", complexity),
481 location: "FILTER clause".to_string(),
482 suggestion: Some(
483 "Consider simplifying the filter or breaking it into multiple filters"
484 .to_string(),
485 ),
486 severity: 5,
487 });
488 }
489
490 Self::validate_filters(pattern, errors, warnings)?;
492 }
493
494 Ok(())
495 }
496
497 pub fn calculate_complexity(algebra: &Algebra) -> Result<usize> {
499 let mut complexity = 0;
500
501 match algebra {
502 Algebra::Bgp(patterns) => {
503 complexity += patterns.len();
504 }
505 Algebra::Join { left, right } => {
506 complexity += 2; complexity += Self::calculate_complexity(left)?;
508 complexity += Self::calculate_complexity(right)?;
509 }
510 Algebra::LeftJoin { left, right, .. } => {
511 complexity += 3; complexity += Self::calculate_complexity(left)?;
513 complexity += Self::calculate_complexity(right)?;
514 }
515 Algebra::Union { left, right } => {
516 complexity += 2;
517 complexity += Self::calculate_complexity(left)?;
518 complexity += Self::calculate_complexity(right)?;
519 }
520 Algebra::Filter {
521 pattern,
522 condition: expr,
523 } => {
524 complexity += 1;
525 complexity += Self::calculate_expression_complexity(expr);
526 complexity += Self::calculate_complexity(pattern)?;
527 }
528 Algebra::Group {
529 pattern,
530 variables: by,
531 aggregates,
532 } => {
533 complexity += 5; complexity += by.len();
535 complexity += aggregates.len() * 2;
536 complexity += Self::calculate_complexity(pattern)?;
537 }
538 Algebra::OrderBy { pattern, .. } => {
539 complexity += 3; complexity += Self::calculate_complexity(pattern)?;
541 }
542 _ => {
543 complexity += 1;
544 }
545 }
546
547 Ok(complexity)
548 }
549
550 fn calculate_expression_complexity(expr: &Expression) -> usize {
552 match expr {
553 Expression::Binary { left, right, .. } => {
554 1 + Self::calculate_expression_complexity(left)
555 + Self::calculate_expression_complexity(right)
556 }
557 Expression::Unary { operand, .. } => 1 + Self::calculate_expression_complexity(operand),
558 Expression::Function { args, .. } => {
559 2 + args
560 .iter()
561 .map(Self::calculate_expression_complexity)
562 .sum::<usize>()
563 }
564 _ => 1,
565 }
566 }
567
568 fn check_performance_issues(
570 &self,
571 algebra: &Algebra,
572 warnings: &mut Vec<ValidationWarning>,
573 ) -> Result<()> {
574 if Self::is_unbounded(algebra)? {
576 warnings.push(ValidationWarning {
577 warning_type: ValidationWarningType::UnboundedQuery,
578 message: "Query has no LIMIT clause and may return very large result sets"
579 .to_string(),
580 location: "Overall query".to_string(),
581 suggestion: Some(
582 "Add a LIMIT clause to prevent excessive memory usage".to_string(),
583 ),
584 severity: 6,
585 });
586 }
587
588 let pattern_count = Self::count_triple_patterns(algebra)?;
590 if pattern_count > self.config.max_triple_patterns {
591 warnings.push(ValidationWarning {
592 warning_type: ValidationWarningType::Performance,
593 message: format!(
594 "Query has {} triple patterns (max recommended: {})",
595 pattern_count, self.config.max_triple_patterns
596 ),
597 location: "WHERE clause".to_string(),
598 suggestion: Some("Consider breaking the query into smaller subqueries".to_string()),
599 severity: 7,
600 });
601 }
602
603 Ok(())
604 }
605
606 fn is_unbounded(algebra: &Algebra) -> Result<bool> {
608 match algebra {
609 Algebra::Slice { .. } => Ok(false),
610 Algebra::Project { pattern, .. } => Self::is_unbounded(pattern),
611 Algebra::OrderBy { pattern, .. } => Self::is_unbounded(pattern),
612 Algebra::Group { pattern, .. } => Self::is_unbounded(pattern),
613 Algebra::Filter { pattern, .. } => Self::is_unbounded(pattern),
614 _ => Ok(true),
615 }
616 }
617
618 fn count_triple_patterns(algebra: &Algebra) -> Result<usize> {
620 match algebra {
621 Algebra::Bgp(patterns) => Ok(patterns.len()),
622 Algebra::Join { left, right }
623 | Algebra::LeftJoin { left, right, .. }
624 | Algebra::Union { left, right } => {
625 Ok(Self::count_triple_patterns(left)? + Self::count_triple_patterns(right)?)
626 }
627 Algebra::Filter { pattern, .. } => Self::count_triple_patterns(pattern),
628 Algebra::Project { pattern, .. } => Self::count_triple_patterns(pattern),
629 _ => Ok(0),
630 }
631 }
632
633 fn check_security_issues(
635 &self,
636 _algebra: &Algebra,
637 _warnings: &mut Vec<ValidationWarning>,
638 ) -> Result<()> {
639 Ok(())
645 }
646
647 fn check_type_consistency(
649 &self,
650 _algebra: &Algebra,
651 _warnings: &mut Vec<ValidationWarning>,
652 ) -> Result<()> {
653 Ok(())
659 }
660
661 pub fn statistics(&self) -> &ValidationStatistics {
663 &self.stats
664 }
665
666 pub fn reset_statistics(&mut self) {
668 self.stats = ValidationStatistics::default();
669 }
670}
671
672impl Default for QueryValidator {
673 fn default() -> Self {
674 Self::new()
675 }
676}
677
678#[cfg(test)]
679mod tests {
680 use super::*;
681 use crate::algebra::{Literal, Term, TriplePattern};
682 use oxirs_core::model::NamedNode;
683
684 fn create_term(s: &str) -> Term {
685 Term::Iri(NamedNode::new(s).unwrap())
686 }
687
688 fn create_var(name: &str) -> Variable {
689 Variable::new(name).unwrap()
690 }
691
692 #[test]
693 fn test_validator_creation() {
694 let validator = QueryValidator::new();
695 assert!(validator.config.strict_compliance);
696 }
697
698 #[test]
699 fn test_unbound_variable_detection() {
700 let mut validator = QueryValidator::new();
701
702 let pattern = Algebra::Bgp(vec![TriplePattern {
704 subject: Term::Variable(create_var("s")),
705 predicate: create_term("http://example.org/p"),
706 object: Term::Variable(create_var("o")),
707 }]);
708
709 let query = Algebra::Project {
710 variables: vec![create_var("s"), create_var("unbound")],
711 pattern: Box::new(pattern),
712 };
713
714 let result = validator.validate(&query).unwrap();
715 assert!(!result.is_valid);
716 assert_eq!(result.errors.len(), 1);
717 assert_eq!(
718 result.errors[0].error_type,
719 ValidationErrorType::UnboundVariable
720 );
721 }
722
723 #[test]
724 fn test_valid_query() {
725 let mut validator = QueryValidator::new();
726
727 let pattern = Algebra::Bgp(vec![TriplePattern {
728 subject: Term::Variable(create_var("s")),
729 predicate: create_term("http://example.org/p"),
730 object: Term::Variable(create_var("o")),
731 }]);
732
733 let query = Algebra::Project {
734 variables: vec![create_var("s"), create_var("o")],
735 pattern: Box::new(pattern),
736 };
737
738 let result = validator.validate(&query).unwrap();
739 assert!(result.is_valid);
740 assert!(result.errors.is_empty());
741 }
742
743 #[test]
744 fn test_cartesian_product_warning() {
745 let mut validator = QueryValidator::new();
746
747 let left = Algebra::Bgp(vec![TriplePattern {
749 subject: Term::Variable(create_var("s1")),
750 predicate: create_term("http://example.org/p1"),
751 object: Term::Variable(create_var("o1")),
752 }]);
753
754 let right = Algebra::Bgp(vec![TriplePattern {
755 subject: Term::Variable(create_var("s2")),
756 predicate: create_term("http://example.org/p2"),
757 object: Term::Variable(create_var("o2")),
758 }]);
759
760 let query = Algebra::Join {
761 left: Box::new(left),
762 right: Box::new(right),
763 };
764
765 let result = validator.validate(&query).unwrap();
766 assert!(result.is_valid); assert!(!result.warnings.is_empty());
768 assert_eq!(
769 result.warnings[0].warning_type,
770 ValidationWarningType::CartesianProduct
771 );
772 }
773
774 #[test]
775 fn test_complexity_calculation() {
776 let _validator = QueryValidator::new();
777
778 let simple = Algebra::Bgp(vec![
780 TriplePattern {
781 subject: Term::Variable(create_var("s")),
782 predicate: create_term("http://example.org/p"),
783 object: Term::Variable(create_var("o")),
784 },
785 TriplePattern {
786 subject: Term::Variable(create_var("o")),
787 predicate: create_term("http://example.org/p2"),
788 object: Term::Literal(Literal::string("test")),
789 },
790 ]);
791
792 let complexity = QueryValidator::calculate_complexity(&simple).unwrap();
793 assert_eq!(complexity, 2);
794 }
795
796 #[test]
797 fn test_validation_statistics() {
798 let mut validator = QueryValidator::new();
799
800 let pattern = Algebra::Bgp(vec![TriplePattern {
801 subject: Term::Variable(create_var("s")),
802 predicate: create_term("http://example.org/p"),
803 object: Term::Variable(create_var("o")),
804 }]);
805
806 let query = Algebra::Project {
807 variables: vec![create_var("s")],
808 pattern: Box::new(pattern),
809 };
810
811 validator.validate(&query).unwrap();
812
813 let stats = validator.statistics();
814 assert_eq!(stats.total_validated, 1);
815 }
816
817 #[test]
818 fn test_custom_config() {
819 let config = ValidationConfig {
820 max_complexity: 50,
821 warn_cartesian_products: false,
822 ..Default::default()
823 };
824
825 let validator = QueryValidator::with_config(config);
826 assert_eq!(validator.config.max_complexity, 50);
827 assert!(!validator.config.warn_cartesian_products);
828 }
829
830 #[test]
831 fn test_aggregate_validation() {
832 let mut validator = QueryValidator::new();
833
834 let pattern = Algebra::Bgp(vec![TriplePattern {
835 subject: Term::Variable(create_var("s")),
836 predicate: create_term("http://example.org/p"),
837 object: Term::Variable(create_var("o")),
838 }]);
839
840 let query = Algebra::Group {
841 pattern: Box::new(pattern),
842 variables: vec![],
843 aggregates: vec![(
844 create_var("count"),
845 Aggregate::Count {
846 distinct: false,
847 expr: None,
848 },
849 )],
850 };
851
852 let result = validator.validate(&query).unwrap();
853 assert!(result.is_valid);
854 }
855
856 #[test]
857 fn test_performance_warnings() {
858 let mut validator = QueryValidator::with_config(ValidationConfig {
859 max_triple_patterns: 2,
860 ..Default::default()
861 });
862
863 let pattern = Algebra::Bgp(vec![
865 TriplePattern {
866 subject: Term::Variable(create_var("s")),
867 predicate: create_term("http://example.org/p1"),
868 object: Term::Variable(create_var("o")),
869 },
870 TriplePattern {
871 subject: Term::Variable(create_var("o")),
872 predicate: create_term("http://example.org/p2"),
873 object: Term::Variable(create_var("o2")),
874 },
875 TriplePattern {
876 subject: Term::Variable(create_var("o2")),
877 predicate: create_term("http://example.org/p3"),
878 object: Term::Literal(Literal::string("test")),
879 },
880 ]);
881
882 let result = validator.validate(&pattern).unwrap();
883 assert!(result.is_valid);
884 assert!(!result.warnings.is_empty());
885 }
886}