oxirs_shacl/optimization/
core.rs

1//! SHACL Constraint Evaluation Optimizations
2//!
3//! This module provides performance optimizations for SHACL constraint evaluation
4//! including result caching, batch validation, and dependency-aware evaluation ordering.
5
6#![allow(dead_code)]
7
8use crate::{
9    constraints::{Constraint, ConstraintContext, ConstraintEvaluationResult},
10    PropertyPath, Result, ShaclError, ShapeId,
11};
12use oxirs_core::{model::Term, RdfTerm, Store};
13use serde::{Deserialize, Serialize};
14use std::collections::HashMap;
15use std::hash::{Hash, Hasher};
16use std::sync::{Arc, RwLock};
17use std::time::{Duration, Instant};
18
19/// Constraint evaluation cache for performance optimization
20#[derive(Debug, Clone)]
21pub struct ConstraintCache {
22    /// Cache for constraint evaluation results
23    cache: Arc<RwLock<HashMap<CacheKey, CachedResult>>>,
24    /// Cache statistics
25    stats: Arc<RwLock<CacheStats>>,
26    /// Maximum cache size
27    max_size: usize,
28    /// Cache entry TTL
29    ttl: Duration,
30}
31
32/// Cache key for constraint evaluation results
33#[derive(Debug, Clone, Hash, PartialEq, Eq)]
34struct CacheKey {
35    /// Hash of the constraint
36    constraint_hash: u64,
37    /// Focus node
38    focus_node: Term,
39    /// Property path (if applicable)
40    path: Option<PropertyPath>,
41    /// Values being validated
42    values_hash: u64,
43    /// Shape ID
44    shape_id: ShapeId,
45}
46
47/// Cached constraint evaluation result
48#[derive(Debug, Clone)]
49struct CachedResult {
50    /// The cached result
51    result: ConstraintEvaluationResult,
52    /// Timestamp when cached
53    cached_at: Instant,
54    /// Number of times this result has been used
55    hit_count: usize,
56}
57
58/// Cache statistics
59#[derive(Debug, Clone, Default)]
60pub struct CacheStats {
61    /// Total cache hits
62    pub hits: usize,
63    /// Total cache misses
64    pub misses: usize,
65    /// Total constraint evaluations
66    pub evaluations: usize,
67    /// Average evaluation time (microseconds)
68    pub avg_evaluation_time_us: f64,
69    /// Cache evictions
70    pub evictions: usize,
71}
72
73impl CacheStats {
74    pub fn hit_rate(&self) -> f64 {
75        if self.hits + self.misses == 0 {
76            0.0
77        } else {
78            self.hits as f64 / (self.hits + self.misses) as f64
79        }
80    }
81}
82
83impl Default for ConstraintCache {
84    fn default() -> Self {
85        Self::new(10000, Duration::from_secs(300)) // 10k entries, 5 min TTL
86    }
87}
88
89impl ConstraintCache {
90    /// Create a new constraint cache
91    pub fn new(max_size: usize, ttl: Duration) -> Self {
92        Self {
93            cache: Arc::new(RwLock::new(HashMap::new())),
94            stats: Arc::new(RwLock::new(CacheStats::default())),
95            max_size,
96            ttl,
97        }
98    }
99
100    /// Get a cached result if available
101    pub fn get(
102        &self,
103        constraint: &Constraint,
104        context: &ConstraintContext,
105    ) -> Option<ConstraintEvaluationResult> {
106        let key = self.create_cache_key(constraint, context);
107
108        // First, check if we have a valid cached result
109        let result = {
110            let cache = self
111                .cache
112                .read()
113                .expect("cache lock should not be poisoned");
114            if let Some(cached) = cache.get(&key) {
115                // Check if entry is still valid
116                if cached.cached_at.elapsed() <= self.ttl {
117                    Some(cached.result.clone())
118                } else {
119                    None
120                }
121            } else {
122                None
123            }
124        };
125
126        if result.is_some() {
127            // Update statistics and hit count
128            {
129                let mut stats = self
130                    .stats
131                    .write()
132                    .expect("stats lock should not be poisoned");
133                stats.hits += 1;
134            }
135
136            // Increment hit count
137            {
138                let mut cache_mut = self
139                    .cache
140                    .write()
141                    .expect("cache lock should not be poisoned");
142                if let Some(entry) = cache_mut.get_mut(&key) {
143                    entry.hit_count += 1;
144                }
145            }
146
147            return result;
148        }
149
150        // Cache miss
151        let mut stats = self
152            .stats
153            .write()
154            .expect("stats lock should not be poisoned");
155        stats.misses += 1;
156        None
157    }
158
159    /// Store a result in the cache
160    pub fn put(
161        &self,
162        constraint: &Constraint,
163        context: &ConstraintContext,
164        result: ConstraintEvaluationResult,
165        evaluation_time: Duration,
166    ) {
167        let key = self.create_cache_key(constraint, context);
168        let cached_result = CachedResult {
169            result: result.clone(),
170            cached_at: Instant::now(),
171            hit_count: 0,
172        };
173
174        {
175            let mut cache = self
176                .cache
177                .write()
178                .expect("cache lock should not be poisoned");
179
180            // Evict entries if cache is full
181            if cache.len() >= self.max_size {
182                self.evict_entries(&mut cache);
183            }
184
185            cache.insert(key, cached_result);
186        }
187
188        // Update statistics
189        {
190            let mut stats = self
191                .stats
192                .write()
193                .expect("stats lock should not be poisoned");
194            stats.evaluations += 1;
195            let eval_time_us = evaluation_time.as_micros() as f64;
196            if stats.evaluations == 1 {
197                stats.avg_evaluation_time_us = eval_time_us;
198            } else {
199                stats.avg_evaluation_time_us =
200                    (stats.avg_evaluation_time_us * (stats.evaluations - 1) as f64 + eval_time_us)
201                        / stats.evaluations as f64;
202            }
203        }
204    }
205
206    /// Create cache key from constraint and context
207    fn create_cache_key(&self, constraint: &Constraint, context: &ConstraintContext) -> CacheKey {
208        let constraint_hash = self.hash_constraint(constraint);
209        let values_hash = self.hash_values(&context.values);
210
211        CacheKey {
212            constraint_hash,
213            focus_node: context.focus_node.clone(),
214            path: context.path.clone(),
215            values_hash,
216            shape_id: context.shape_id.clone(),
217        }
218    }
219
220    /// Hash a constraint for caching
221    fn hash_constraint(&self, constraint: &Constraint) -> u64 {
222        use std::collections::hash_map::DefaultHasher;
223        let mut hasher = DefaultHasher::new();
224
225        // Hash based on constraint type and key properties
226        match constraint {
227            Constraint::Class(c) => {
228                "class".hash(&mut hasher);
229                c.class_iri.as_str().hash(&mut hasher);
230            }
231            Constraint::Datatype(c) => {
232                "datatype".hash(&mut hasher);
233                c.datatype_iri.as_str().hash(&mut hasher);
234            }
235            Constraint::MinCount(c) => {
236                "minCount".hash(&mut hasher);
237                c.min_count.hash(&mut hasher);
238            }
239            Constraint::MaxCount(c) => {
240                "maxCount".hash(&mut hasher);
241                c.max_count.hash(&mut hasher);
242            }
243            Constraint::Pattern(c) => {
244                "pattern".hash(&mut hasher);
245                c.pattern.hash(&mut hasher);
246                c.flags.hash(&mut hasher);
247            }
248            // Add more constraint types as needed
249            _ => {
250                format!("{constraint:?}").hash(&mut hasher);
251            }
252        }
253
254        hasher.finish()
255    }
256
257    /// Hash values for caching
258    fn hash_values(&self, values: &[Term]) -> u64 {
259        use std::collections::hash_map::DefaultHasher;
260        let mut hasher = DefaultHasher::new();
261        for value in values {
262            format!("{value:?}").hash(&mut hasher);
263        }
264        hasher.finish()
265    }
266
267    /// Evict cache entries using LFU (Least Frequently Used) strategy
268    fn evict_entries(&self, cache: &mut HashMap<CacheKey, CachedResult>) {
269        let evict_count = cache.len() / 4; // Evict 25% of entries
270
271        // Sort by hit count and age
272        let mut entries: Vec<_> = cache.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
273        entries.sort_by(|a, b| {
274            let hit_cmp = a.1.hit_count.cmp(&b.1.hit_count);
275            if hit_cmp == std::cmp::Ordering::Equal {
276                // If hit counts are equal, evict older entries
277                b.1.cached_at.cmp(&a.1.cached_at)
278            } else {
279                hit_cmp
280            }
281        });
282
283        // Collect keys to remove
284        let keys_to_remove: Vec<_> = entries
285            .iter()
286            .take(evict_count)
287            .map(|(k, _)| k.clone())
288            .collect();
289
290        // Remove least frequently used entries
291        for key in keys_to_remove {
292            cache.remove(&key);
293        }
294
295        // Update eviction statistics
296        let mut stats = self
297            .stats
298            .write()
299            .expect("stats lock should not be poisoned");
300        stats.evictions += evict_count;
301    }
302
303    /// Get cache statistics
304    pub fn stats(&self) -> CacheStats {
305        self.stats
306            .read()
307            .expect("stats lock should not be poisoned")
308            .clone()
309    }
310
311    /// Clear the cache
312    pub fn clear(&self) {
313        self.cache
314            .write()
315            .expect("cache lock should not be poisoned")
316            .clear();
317    }
318}
319
320/// Batch constraint evaluator for efficient evaluation of multiple constraints
321#[derive(Debug)]
322pub struct BatchConstraintEvaluator {
323    cache: ConstraintCache,
324    parallel_evaluation: bool,
325    batch_size: usize,
326}
327
328impl Default for BatchConstraintEvaluator {
329    fn default() -> Self {
330        Self::new(ConstraintCache::default(), false, 100)
331    }
332}
333
334impl BatchConstraintEvaluator {
335    /// Create a new batch evaluator
336    pub fn new(cache: ConstraintCache, parallel_evaluation: bool, batch_size: usize) -> Self {
337        Self {
338            cache,
339            parallel_evaluation,
340            batch_size,
341        }
342    }
343
344    /// Evaluate multiple constraints in batches
345    pub fn evaluate_batch(
346        &self,
347        store: &dyn Store,
348        constraints_with_contexts: Vec<(Constraint, ConstraintContext)>,
349    ) -> Result<Vec<ConstraintEvaluationResult>> {
350        let mut results = Vec::with_capacity(constraints_with_contexts.len());
351
352        // Process in batches
353        for batch in constraints_with_contexts.chunks(self.batch_size) {
354            let batch_results = if self.parallel_evaluation {
355                self.evaluate_batch_parallel(store, batch)?
356            } else {
357                self.evaluate_batch_sequential(store, batch)?
358            };
359            results.extend(batch_results);
360        }
361
362        Ok(results)
363    }
364
365    /// Evaluate constraints sequentially with caching
366    fn evaluate_batch_sequential(
367        &self,
368        store: &dyn Store,
369        batch: &[(Constraint, ConstraintContext)],
370    ) -> Result<Vec<ConstraintEvaluationResult>> {
371        let mut results = Vec::new();
372
373        for (constraint, context) in batch {
374            // Try cache first
375            if let Some(cached_result) = self.cache.get(constraint, context) {
376                results.push(cached_result);
377                continue;
378            }
379
380            // Evaluate and cache result
381            let start_time = Instant::now();
382            let result = constraint.evaluate(store, context)?;
383            let evaluation_time = start_time.elapsed();
384
385            self.cache
386                .put(constraint, context, result.clone(), evaluation_time);
387            results.push(result);
388        }
389
390        Ok(results)
391    }
392
393    /// Evaluate constraints in parallel (when safe to do so)
394    fn evaluate_batch_parallel(
395        &self,
396        store: &dyn Store,
397        batch: &[(Constraint, ConstraintContext)],
398    ) -> Result<Vec<ConstraintEvaluationResult>> {
399        // For parallel evaluation, we need to be careful about thread safety
400        // We'll use a thread pool for CPU-bound constraint evaluation
401
402        if batch.len() < 4 {
403            // For small batches, sequential is faster
404            return self.evaluate_batch_sequential(store, batch);
405        }
406
407        // For now, parallel evaluation has thread safety limitations
408        // Fall back to sequential evaluation to avoid Rc<> threading issues
409        self.evaluate_batch_sequential(store, batch)
410    }
411
412    /// Get cache statistics
413    pub fn cache_stats(&self) -> CacheStats {
414        self.cache.stats()
415    }
416}
417
418/// Constraint dependency analyzer for optimal evaluation ordering
419#[derive(Debug)]
420pub struct ConstraintDependencyAnalyzer {
421    /// Constraint evaluation cost estimates
422    cost_estimates: HashMap<String, f64>,
423}
424
425impl Default for ConstraintDependencyAnalyzer {
426    fn default() -> Self {
427        let mut cost_estimates = HashMap::new();
428
429        // Cost estimates for different constraint types (relative)
430        cost_estimates.insert("class".to_string(), 5.0);
431        cost_estimates.insert("datatype".to_string(), 1.0);
432        cost_estimates.insert("nodeKind".to_string(), 1.0);
433        cost_estimates.insert("minCount".to_string(), 1.0);
434        cost_estimates.insert("maxCount".to_string(), 1.0);
435        cost_estimates.insert("pattern".to_string(), 3.0);
436        cost_estimates.insert("sparql".to_string(), 10.0);
437        cost_estimates.insert("qualifiedValueShape".to_string(), 8.0);
438        cost_estimates.insert("closed".to_string(), 6.0);
439
440        Self { cost_estimates }
441    }
442}
443
444impl ConstraintDependencyAnalyzer {
445    /// Analyze and order constraints for optimal evaluation
446    pub fn optimize_constraint_order(&self, constraints: Vec<Constraint>) -> Vec<Constraint> {
447        let mut constraint_info: Vec<_> = constraints
448            .into_iter()
449            .map(|c| {
450                let cost = self.estimate_constraint_cost(&c);
451                let selectivity = self.estimate_constraint_selectivity(&c);
452                // Sort by selectivity (lower is better) then by cost
453                (c, selectivity, cost)
454            })
455            .collect();
456
457        // Sort by selectivity first (more selective constraints first), then by cost
458        constraint_info.sort_by(|a, b| {
459            let selectivity_cmp = a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal);
460            if selectivity_cmp == std::cmp::Ordering::Equal {
461                // If selectivity is equal, prioritize lower cost constraints
462                a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal)
463            } else {
464                selectivity_cmp
465            }
466        });
467
468        constraint_info.into_iter().map(|(c, _, _)| c).collect()
469    }
470
471    /// Estimate the cost of evaluating a constraint
472    pub fn estimate_constraint_cost(&self, constraint: &Constraint) -> f64 {
473        let base_cost = match constraint {
474            Constraint::Class(_) => self.cost_estimates.get("class").copied().unwrap_or(5.0),
475            Constraint::Datatype(_) => self.cost_estimates.get("datatype").copied().unwrap_or(1.0),
476            Constraint::NodeKind(_) => self.cost_estimates.get("nodeKind").copied().unwrap_or(1.0),
477            Constraint::MinCount(_) => self.cost_estimates.get("minCount").copied().unwrap_or(1.0),
478            Constraint::MaxCount(_) => self.cost_estimates.get("maxCount").copied().unwrap_or(1.0),
479            Constraint::Pattern(_) => self.cost_estimates.get("pattern").copied().unwrap_or(3.0),
480            Constraint::Sparql(_) => self.cost_estimates.get("sparql").copied().unwrap_or(10.0),
481            Constraint::QualifiedValueShape(_) => self
482                .cost_estimates
483                .get("qualifiedValueShape")
484                .copied()
485                .unwrap_or(8.0),
486            Constraint::Closed(_) => self.cost_estimates.get("closed").copied().unwrap_or(6.0),
487            Constraint::And(_) | Constraint::Or(_) | Constraint::Xone(_) => {
488                // Logical constraints have variable cost based on sub-constraints
489                7.0
490            }
491            _ => 3.0, // Default cost for other constraints
492        };
493
494        base_cost
495    }
496
497    /// Estimate the selectivity of a constraint (how many results it will filter out)
498    pub fn estimate_constraint_selectivity(&self, constraint: &Constraint) -> f64 {
499        match constraint {
500            // Very selective constraints (eliminate many candidates)
501            Constraint::Class(_) => 0.8,
502            Constraint::Datatype(_) => 0.6,
503            Constraint::NodeKind(_) => 0.3,
504            Constraint::HasValue(_) => 0.05,
505            Constraint::In(_) => 0.15,
506
507            // Moderately selective constraints - MinCount is often very cheap to check
508            Constraint::MinCount(_) | Constraint::MaxCount(_) => 0.1,
509            Constraint::Pattern(_) => 0.5,
510            Constraint::MinLength(_) | Constraint::MaxLength(_) => 0.6,
511
512            // Less selective constraints
513            Constraint::MinInclusive(_) | Constraint::MaxInclusive(_) => 0.7,
514            Constraint::MinExclusive(_) | Constraint::MaxExclusive(_) => 0.7,
515
516            // Variable selectivity (depends on implementation)
517            Constraint::Sparql(_) => 0.8,
518            Constraint::QualifiedValueShape(_) => 0.6,
519            Constraint::Closed(_) => 0.4,
520
521            // Logical constraints depend on sub-constraints
522            Constraint::And(_) => 0.3,  // AND is generally selective
523            Constraint::Or(_) => 0.8,   // OR is generally less selective
524            Constraint::Xone(_) => 0.5, // XOR is moderately selective
525            Constraint::Not(_) => 0.9,  // NOT is generally less selective
526
527            _ => 0.5, // Default moderate selectivity
528        }
529    }
530
531    /// Update cost estimate for a constraint type based on actual performance
532    pub fn update_cost_estimate(&mut self, constraint_type: &str, actual_cost: f64) {
533        // Use exponential moving average to update cost estimates
534        let alpha = 0.1; // Learning rate
535        let current_estimate = self
536            .cost_estimates
537            .get(constraint_type)
538            .copied()
539            .unwrap_or(3.0);
540        let new_estimate = alpha * actual_cost + (1.0 - alpha) * current_estimate;
541        self.cost_estimates
542            .insert(constraint_type.to_string(), new_estimate);
543    }
544}
545
546/// Advanced validation optimization engine
547#[derive(Debug)]
548pub struct ValidationOptimizationEngine {
549    /// Constraint cache for memoization
550    cache: ConstraintCache,
551    /// Dependency analyzer for ordering
552    dependency_analyzer: ConstraintDependencyAnalyzer,
553    /// Batch evaluator for efficient processing
554    batch_evaluator: BatchConstraintEvaluator,
555    /// Performance metrics
556    metrics: Arc<RwLock<OptimizationMetrics>>,
557    /// Configuration
558    config: OptimizationConfig,
559}
560
561/// Optimization configuration
562#[derive(Debug, Clone)]
563pub struct OptimizationConfig {
564    /// Enable constraint result caching
565    pub enable_caching: bool,
566    /// Enable parallel evaluation where possible
567    pub enable_parallel: bool,
568    /// Batch size for constraint evaluation
569    pub batch_size: usize,
570    /// Enable constraint reordering
571    pub enable_reordering: bool,
572    /// Maximum cache size
573    pub max_cache_size: usize,
574    /// Cache TTL in seconds
575    pub cache_ttl_secs: u64,
576}
577
578impl Default for OptimizationConfig {
579    fn default() -> Self {
580        Self {
581            enable_caching: true,
582            enable_parallel: false, // Disabled by default due to thread safety
583            batch_size: 100,
584            enable_reordering: true,
585            max_cache_size: 10000,
586            cache_ttl_secs: 300,
587        }
588    }
589}
590
591/// Optimization metrics for performance monitoring
592#[derive(Debug, Clone, Default)]
593pub struct OptimizationMetrics {
594    /// Total constraint evaluations
595    pub total_evaluations: usize,
596    /// Cache hit rate
597    pub cache_hit_rate: f64,
598    /// Average evaluation time (microseconds)
599    pub avg_evaluation_time_us: f64,
600    /// Constraints reordered for optimization
601    pub constraints_reordered: usize,
602    /// Total optimization time saved (microseconds)
603    pub optimization_time_saved_us: f64,
604    /// Most expensive constraint types
605    pub expensive_constraints: HashMap<String, f64>,
606}
607
608impl ValidationOptimizationEngine {
609    /// Create a new optimization engine
610    pub fn new(config: OptimizationConfig) -> Self {
611        let cache = ConstraintCache::new(
612            config.max_cache_size,
613            Duration::from_secs(config.cache_ttl_secs),
614        );
615        let dependency_analyzer = ConstraintDependencyAnalyzer::default();
616        let batch_evaluator =
617            BatchConstraintEvaluator::new(cache.clone(), config.enable_parallel, config.batch_size);
618
619        Self {
620            cache,
621            dependency_analyzer,
622            batch_evaluator,
623            metrics: Arc::new(RwLock::new(OptimizationMetrics::default())),
624            config,
625        }
626    }
627
628    /// Optimize and evaluate a set of constraints
629    pub fn optimize_and_evaluate(
630        &mut self,
631        store: &dyn Store,
632        constraints_with_contexts: Vec<(Constraint, ConstraintContext)>,
633    ) -> Result<Vec<ConstraintEvaluationResult>> {
634        let start_time = Instant::now();
635
636        // Step 1: Reorder constraints for optimal evaluation if enabled
637        let optimized_constraints = if self.config.enable_reordering {
638            self.reorder_constraints_for_optimization(constraints_with_contexts)
639        } else {
640            constraints_with_contexts
641        };
642
643        // Step 2: Evaluate using batch processing
644        let results = if self.config.enable_caching {
645            self.batch_evaluator
646                .evaluate_batch(store, optimized_constraints)?
647        } else {
648            // Direct evaluation without caching
649            self.evaluate_without_cache(store, optimized_constraints)?
650        };
651
652        // Step 3: Update metrics
653        let total_time = start_time.elapsed();
654        self.update_metrics(results.len(), total_time);
655
656        Ok(results)
657    }
658
659    /// Reorder constraints based on cost and selectivity analysis
660    fn reorder_constraints_for_optimization(
661        &mut self,
662        constraints_with_contexts: Vec<(Constraint, ConstraintContext)>,
663    ) -> Vec<(Constraint, ConstraintContext)> {
664        // Group by context to maintain constraint evaluation order within same context
665        let mut context_groups: HashMap<String, Vec<(Constraint, ConstraintContext)>> =
666            HashMap::new();
667
668        for (constraint, context) in constraints_with_contexts {
669            let context_key = format!("{:?}_{:?}", context.focus_node, context.shape_id);
670            context_groups
671                .entry(context_key)
672                .or_default()
673                .push((constraint, context));
674        }
675
676        let mut optimized = Vec::new();
677
678        for (_, mut group) in context_groups {
679            // Sort constraints within each context group
680            group.sort_by(|(a, _), (b, _)| {
681                let cost_a = self.dependency_analyzer.estimate_constraint_cost(a);
682                let cost_b = self.dependency_analyzer.estimate_constraint_cost(b);
683                let selectivity_a = self.dependency_analyzer.estimate_constraint_selectivity(a);
684                let selectivity_b = self.dependency_analyzer.estimate_constraint_selectivity(b);
685
686                // Primary: selectivity (more selective first)
687                // Secondary: cost (lower cost first)
688                selectivity_a
689                    .partial_cmp(&selectivity_b)
690                    .unwrap_or(std::cmp::Ordering::Equal)
691                    .then_with(|| {
692                        cost_a
693                            .partial_cmp(&cost_b)
694                            .unwrap_or(std::cmp::Ordering::Equal)
695                    })
696            });
697
698            optimized.extend(group);
699        }
700
701        // Update metrics
702        if let Ok(mut metrics) = self.metrics.write() {
703            metrics.constraints_reordered += optimized.len();
704        }
705
706        optimized
707    }
708
709    /// Evaluate constraints without caching (for comparison)
710    fn evaluate_without_cache(
711        &self,
712        store: &dyn Store,
713        constraints_with_contexts: Vec<(Constraint, ConstraintContext)>,
714    ) -> Result<Vec<ConstraintEvaluationResult>> {
715        let mut results = Vec::new();
716
717        for (constraint, context) in constraints_with_contexts {
718            let result = constraint.evaluate(store, &context)?;
719            results.push(result);
720        }
721
722        Ok(results)
723    }
724
725    /// Update optimization metrics
726    fn update_metrics(&self, evaluation_count: usize, total_time: Duration) {
727        if let Ok(mut metrics) = self.metrics.write() {
728            metrics.total_evaluations += evaluation_count;
729
730            let cache_stats = self.cache.stats();
731            metrics.cache_hit_rate = cache_stats.hit_rate();
732            metrics.avg_evaluation_time_us = cache_stats.avg_evaluation_time_us;
733
734            // Estimate time saved through optimization
735            let time_per_evaluation = total_time.as_micros() as f64 / evaluation_count as f64;
736            metrics.optimization_time_saved_us +=
737                cache_stats.hits as f64 * time_per_evaluation * 0.8; // Assume 80% time saving from cache hits
738        }
739    }
740
741    /// Get current optimization metrics
742    pub fn get_metrics(&self) -> OptimizationMetrics {
743        self.metrics
744            .read()
745            .expect("metrics lock should not be poisoned")
746            .clone()
747    }
748
749    /// Clear all caches and reset metrics
750    pub fn reset(&mut self) {
751        self.cache.clear();
752        if let Ok(mut metrics) = self.metrics.write() {
753            *metrics = OptimizationMetrics::default();
754        }
755    }
756
757    /// Update configuration
758    pub fn update_config(&mut self, config: OptimizationConfig) {
759        self.config = config;
760        // Note: Some config changes may require recreating components
761    }
762
763    /// Get cache statistics
764    pub fn get_cache_stats(&self) -> CacheStats {
765        self.cache.stats()
766    }
767}
768
769/// Advanced constraint evaluation orchestrator
770#[derive(Debug)]
771pub struct AdvancedConstraintEvaluator {
772    batch_evaluator: BatchConstraintEvaluator,
773    dependency_analyzer: ConstraintDependencyAnalyzer,
774    enable_early_termination: bool,
775}
776
777impl Default for AdvancedConstraintEvaluator {
778    fn default() -> Self {
779        Self {
780            batch_evaluator: BatchConstraintEvaluator::default(),
781            dependency_analyzer: ConstraintDependencyAnalyzer::default(),
782            enable_early_termination: true,
783        }
784    }
785}
786
787impl AdvancedConstraintEvaluator {
788    /// Create new advanced evaluator with custom configuration
789    pub fn new(
790        cache: ConstraintCache,
791        parallel: bool,
792        batch_size: usize,
793        early_termination: bool,
794    ) -> Self {
795        Self {
796            batch_evaluator: BatchConstraintEvaluator::new(cache, parallel, batch_size),
797            dependency_analyzer: ConstraintDependencyAnalyzer::default(),
798            enable_early_termination: early_termination,
799        }
800    }
801
802    /// Evaluate constraints with advanced optimizations
803    pub fn evaluate_optimized(
804        &self,
805        store: &dyn Store,
806        constraints: Vec<Constraint>,
807        context: ConstraintContext,
808    ) -> Result<Vec<ConstraintEvaluationResult>> {
809        // Optimize constraint order
810        let optimized_constraints = self
811            .dependency_analyzer
812            .optimize_constraint_order(constraints);
813
814        // Prepare constraint-context pairs
815        let constraints_with_contexts: Vec<_> = optimized_constraints
816            .into_iter()
817            .map(|c| (c, context.clone()))
818            .collect();
819
820        // Evaluate with early termination if enabled
821        if self.enable_early_termination {
822            let mut results = Vec::new();
823
824            for (constraint, ctx) in constraints_with_contexts {
825                // Try cache first
826                let result = if let Some(cached) = self.batch_evaluator.cache.get(&constraint, &ctx)
827                {
828                    cached
829                } else {
830                    let start_time = Instant::now();
831                    let result = constraint.evaluate(store, &ctx)?;
832                    let evaluation_time = start_time.elapsed();
833                    self.batch_evaluator.cache.put(
834                        &constraint,
835                        &ctx,
836                        result.clone(),
837                        evaluation_time,
838                    );
839                    result
840                };
841
842                results.push(result.clone());
843
844                // Early termination on first violation for performance
845                if result.is_violated() {
846                    // For the remaining constraints, return "not evaluated" or continue based on use case
847                    // For now, we'll continue evaluating all constraints
848                }
849            }
850
851            Ok(results)
852        } else {
853            // Evaluate all constraints in batches
854            self.batch_evaluator
855                .evaluate_batch(store, constraints_with_contexts)
856        }
857    }
858
859    /// Get performance statistics
860    pub fn get_performance_stats(&self) -> ConstraintPerformanceStats {
861        let cache_stats = self.batch_evaluator.cache_stats();
862        ConstraintPerformanceStats {
863            cache_hit_rate: cache_stats.hit_rate(),
864            total_evaluations: cache_stats.evaluations,
865            avg_evaluation_time_us: cache_stats.avg_evaluation_time_us,
866            cache_evictions: cache_stats.evictions,
867        }
868    }
869}
870
871/// Performance statistics for constraint evaluation
872#[derive(Debug, Clone, Serialize, Deserialize)]
873pub struct ConstraintPerformanceStats {
874    pub cache_hit_rate: f64,
875    pub total_evaluations: usize,
876    pub avg_evaluation_time_us: f64,
877    pub cache_evictions: usize,
878}
879
880#[cfg(test)]
881mod tests {
882    use super::*;
883    use crate::constraints::{ClassConstraint, DatatypeConstraint, MinCountConstraint};
884    use oxirs_core::model::NamedNode;
885
886    #[test]
887    fn test_constraint_cache() {
888        let cache = ConstraintCache::new(100, Duration::from_secs(60));
889
890        let constraint = Constraint::Class(ClassConstraint {
891            class_iri: NamedNode::new("http://example.org/Person").unwrap(),
892        });
893
894        let context = ConstraintContext::new(
895            Term::NamedNode(NamedNode::new("http://example.org/john").unwrap()),
896            ShapeId::new("PersonShape"),
897        );
898
899        // Should be cache miss initially
900        assert!(cache.get(&constraint, &context).is_none());
901    }
902
903    #[test]
904    fn test_constraint_ordering() {
905        let analyzer = ConstraintDependencyAnalyzer::default();
906
907        let constraints = vec![
908            Constraint::Class(ClassConstraint {
909                class_iri: NamedNode::new("http://example.org/Person").unwrap(),
910            }),
911            Constraint::MinCount(MinCountConstraint { min_count: 1 }),
912            Constraint::Datatype(DatatypeConstraint {
913                datatype_iri: NamedNode::new("http://www.w3.org/2001/XMLSchema#string").unwrap(),
914            }),
915        ];
916
917        let optimized = analyzer.optimize_constraint_order(constraints);
918
919        // MinCount should come first (low selectivity), then datatype, then class
920        assert!(matches!(optimized[0], Constraint::MinCount(_)));
921    }
922}
923
924/// Streaming validation engine for large datasets
925#[derive(Debug)]
926pub struct StreamingValidationEngine {
927    /// Batch size for streaming processing
928    batch_size: usize,
929    /// Memory threshold in bytes
930    memory_threshold: usize,
931    /// Enable memory monitoring
932    memory_monitoring: bool,
933    /// Advanced constraint evaluator
934    evaluator: AdvancedConstraintEvaluator,
935}
936
937impl Default for StreamingValidationEngine {
938    fn default() -> Self {
939        Self::new(1000, 100 * 1024 * 1024, true) // 1k batch, 100MB memory limit
940    }
941}
942
943impl StreamingValidationEngine {
944    /// Create new streaming validation engine
945    pub fn new(batch_size: usize, memory_threshold: usize, memory_monitoring: bool) -> Self {
946        let cache = ConstraintCache::new(10000, Duration::from_secs(300));
947        let evaluator = AdvancedConstraintEvaluator::new(cache, true, batch_size / 4, true);
948
949        Self {
950            batch_size,
951            memory_threshold,
952            memory_monitoring,
953            evaluator,
954        }
955    }
956
957    /// Validate large dataset in streaming fashion
958    pub fn validate_streaming<I>(
959        &self,
960        store: &dyn Store,
961        constraints: Vec<Constraint>,
962        node_stream: I,
963    ) -> Result<StreamingValidationResult>
964    where
965        I: Iterator<Item = Term>,
966    {
967        let mut result = StreamingValidationResult::new();
968        let mut current_batch = Vec::new();
969        let mut processed_count = 0;
970
971        for node in node_stream {
972            current_batch.push(node);
973
974            // Process batch when full
975            if current_batch.len() >= self.batch_size {
976                let batch_result = self.process_batch(store, &constraints, &current_batch)?;
977                result.merge_batch_result(batch_result);
978
979                processed_count += current_batch.len();
980                current_batch.clear();
981
982                // Memory monitoring
983                if self.memory_monitoring && self.check_memory_pressure()? {
984                    result.memory_pressure_events += 1;
985
986                    // Trigger garbage collection or cache eviction
987                    self.evaluator.batch_evaluator.cache.clear();
988
989                    // Could also implement memory spill-to-disk here
990                    tracing::warn!("Memory pressure detected, cleared cache");
991                }
992
993                // Progress reporting
994                if processed_count % (self.batch_size * 10) == 0 {
995                    tracing::info!("Processed {} nodes", processed_count);
996                }
997            }
998        }
999
1000        // Process remaining batch
1001        if !current_batch.is_empty() {
1002            let batch_result = self.process_batch(store, &constraints, &current_batch)?;
1003            result.merge_batch_result(batch_result);
1004        }
1005
1006        result.total_nodes = processed_count + current_batch.len();
1007        Ok(result)
1008    }
1009
1010    /// Process a single batch of nodes
1011    fn process_batch(
1012        &self,
1013        store: &dyn Store,
1014        constraints: &[Constraint],
1015        nodes: &[Term],
1016    ) -> Result<BatchValidationResult> {
1017        let mut batch_result = BatchValidationResult::new();
1018        let start_time = Instant::now();
1019
1020        for node in nodes {
1021            let context = ConstraintContext::new(node.clone(), ShapeId::new("BatchValidation"));
1022
1023            let constraint_results =
1024                self.evaluator
1025                    .evaluate_optimized(store, constraints.to_vec(), context)?;
1026
1027            // Count violations in this batch
1028            let violations = constraint_results
1029                .iter()
1030                .filter(|r| r.is_violated())
1031                .count();
1032            batch_result.violation_count += violations;
1033            batch_result.node_count += 1;
1034        }
1035
1036        batch_result.processing_time = start_time.elapsed();
1037        Ok(batch_result)
1038    }
1039
1040    /// Check if memory usage is approaching threshold
1041    fn check_memory_pressure(&self) -> Result<bool> {
1042        if !self.memory_monitoring {
1043            return Ok(false);
1044        }
1045
1046        // Simple memory check - in practice would use more sophisticated monitoring
1047        let stats = self.evaluator.get_performance_stats();
1048
1049        // Heuristic: if we have many cache evictions, we're under memory pressure
1050        Ok(stats.cache_evictions > 100 && stats.cache_hit_rate < 0.5)
1051    }
1052}
1053
1054/// Result of streaming validation
1055#[derive(Debug, Clone)]
1056pub struct StreamingValidationResult {
1057    pub total_nodes: usize,
1058    pub total_violations: usize,
1059    pub total_processing_time: Duration,
1060    pub memory_pressure_events: usize,
1061    pub batches_processed: usize,
1062}
1063
1064impl StreamingValidationResult {
1065    fn new() -> Self {
1066        Self {
1067            total_nodes: 0,
1068            total_violations: 0,
1069            total_processing_time: Duration::ZERO,
1070            memory_pressure_events: 0,
1071            batches_processed: 0,
1072        }
1073    }
1074
1075    fn merge_batch_result(&mut self, batch: BatchValidationResult) {
1076        self.total_violations += batch.violation_count;
1077        self.total_processing_time += batch.processing_time;
1078        self.batches_processed += 1;
1079    }
1080}
1081
1082/// Result of processing a single batch
1083#[derive(Debug, Clone)]
1084struct BatchValidationResult {
1085    pub node_count: usize,
1086    pub violation_count: usize,
1087    pub processing_time: Duration,
1088}
1089
1090impl BatchValidationResult {
1091    fn new() -> Self {
1092        Self {
1093            node_count: 0,
1094            violation_count: 0,
1095            processing_time: Duration::ZERO,
1096        }
1097    }
1098}
1099
1100/// Incremental validation engine for change-based validation
1101#[derive(Debug)]
1102pub struct IncrementalValidationEngine {
1103    /// Cache for previous validation results
1104    previous_results: Arc<RwLock<HashMap<Term, ValidationSnapshot>>>,
1105    /// Advanced evaluator
1106    evaluator: AdvancedConstraintEvaluator,
1107    /// Change detection sensitivity
1108    change_detection_level: ChangeDetectionLevel,
1109}
1110
1111/// Validation snapshot for incremental processing
1112#[derive(Debug, Clone)]
1113struct ValidationSnapshot {
1114    /// Node that was validated
1115    node: Term,
1116    /// Hash of constraints that were applied
1117    constraints_hash: u64,
1118    /// Hash of the node's properties at validation time
1119    properties_hash: u64,
1120    /// Validation result
1121    result: Vec<ConstraintEvaluationResult>,
1122    /// Timestamp of validation
1123    validated_at: Instant,
1124}
1125
1126/// Level of change detection for incremental validation
1127#[derive(Debug, Clone)]
1128pub enum ChangeDetectionLevel {
1129    /// Only detect if node identity changed
1130    NodeOnly,
1131    /// Detect changes in immediate properties
1132    Properties,
1133    /// Detect changes in entire subgraph
1134    SubGraph,
1135}
1136
1137impl Default for IncrementalValidationEngine {
1138    fn default() -> Self {
1139        let cache = ConstraintCache::new(50000, Duration::from_secs(3600)); // Larger cache for incremental
1140        let evaluator = AdvancedConstraintEvaluator::new(cache, true, 100, false);
1141
1142        Self {
1143            previous_results: Arc::new(RwLock::new(HashMap::new())),
1144            evaluator,
1145            change_detection_level: ChangeDetectionLevel::Properties,
1146        }
1147    }
1148}
1149
1150impl IncrementalValidationEngine {
1151    /// Validate only changed nodes since last validation
1152    pub fn validate_incremental(
1153        &mut self,
1154        store: &dyn Store,
1155        constraints: Vec<Constraint>,
1156        nodes: &[Term],
1157        force_revalidate: bool,
1158    ) -> Result<IncrementalValidationResult> {
1159        let mut result = IncrementalValidationResult::new();
1160        let start_time = Instant::now();
1161
1162        let constraints_hash = self.hash_constraints(&constraints);
1163
1164        for node in nodes {
1165            let properties_hash = self.hash_node_properties(store, node)?;
1166
1167            let needs_validation = force_revalidate || {
1168                let previous_results = self.previous_results.read().unwrap();
1169                match previous_results.get(node) {
1170                    Some(snapshot) => {
1171                        // Check if constraints or properties changed
1172                        snapshot.constraints_hash != constraints_hash
1173                            || snapshot.properties_hash != properties_hash
1174                    }
1175                    None => true, // Never validated before
1176                }
1177            };
1178
1179            if needs_validation {
1180                let context =
1181                    ConstraintContext::new(node.clone(), ShapeId::new("IncrementalValidation"));
1182
1183                let constraint_results =
1184                    self.evaluator
1185                        .evaluate_optimized(store, constraints.clone(), context)?;
1186
1187                // Update snapshot
1188                let snapshot = ValidationSnapshot {
1189                    node: node.clone(),
1190                    constraints_hash,
1191                    properties_hash,
1192                    result: constraint_results.clone(),
1193                    validated_at: Instant::now(),
1194                };
1195
1196                {
1197                    let mut previous_results = self.previous_results.write().unwrap();
1198                    previous_results.insert(node.clone(), snapshot);
1199                }
1200
1201                let violations = constraint_results
1202                    .iter()
1203                    .filter(|r| r.is_violated())
1204                    .count();
1205                result.revalidated_nodes += 1;
1206                result.new_violations += violations;
1207            } else {
1208                result.skipped_nodes += 1;
1209            }
1210        }
1211
1212        result.total_processing_time = start_time.elapsed();
1213        Ok(result)
1214    }
1215
1216    /// Hash constraints for change detection
1217    fn hash_constraints(&self, constraints: &[Constraint]) -> u64 {
1218        use std::collections::hash_map::DefaultHasher;
1219        let mut hasher = DefaultHasher::new();
1220        for constraint in constraints {
1221            format!("{constraint:?}").hash(&mut hasher);
1222        }
1223        hasher.finish()
1224    }
1225
1226    /// Hash node properties for change detection with comprehensive RDF triple analysis
1227    fn hash_node_properties(&self, store: &dyn Store, node: &Term) -> Result<u64> {
1228        use std::collections::hash_map::DefaultHasher;
1229        use std::hash::Hash;
1230        let mut hasher = DefaultHasher::new();
1231
1232        // Hash all triples where this node is the subject
1233        match self.query_node_triples_as_subject(store, node) {
1234            Ok(subject_triples) => {
1235                for triple in subject_triples {
1236                    triple.subject().as_str().hash(&mut hasher);
1237                    triple.predicate().as_str().hash(&mut hasher);
1238                    triple.object().as_str().hash(&mut hasher);
1239                }
1240            }
1241            Err(_) => {
1242                // Fallback to node-only hash if store query fails
1243                node.as_str().hash(&mut hasher);
1244            }
1245        }
1246
1247        // Hash all triples where this node is the object (to detect incoming references)
1248        match self.query_node_triples_as_object(store, node) {
1249            Ok(object_triples) => {
1250                for triple in object_triples {
1251                    triple.subject().as_str().hash(&mut hasher);
1252                    triple.predicate().as_str().hash(&mut hasher);
1253                    triple.object().as_str().hash(&mut hasher);
1254                }
1255            }
1256            Err(_) => {
1257                // Continue without incoming reference detection if query fails
1258            }
1259        }
1260
1261        Ok(hasher.finish())
1262    }
1263
1264    /// Query store for all triples where the node is the subject
1265    fn query_node_triples_as_subject(
1266        &self,
1267        store: &dyn Store,
1268        node: &Term,
1269    ) -> Result<Vec<oxirs_core::model::Triple>> {
1270        let mut triples = Vec::new();
1271
1272        // Create a pattern to match triples with this node as subject
1273        let subject = match node {
1274            Term::NamedNode(nn) => Some(oxirs_core::model::Subject::NamedNode(nn.clone())),
1275            Term::BlankNode(bn) => Some(oxirs_core::model::Subject::BlankNode(bn.clone())),
1276            Term::Variable(v) => Some(oxirs_core::model::Subject::Variable(v.clone())),
1277            _ => None,
1278        };
1279        let quads = match subject {
1280            Some(s) => store.find_quads(Some(&s), None, None, None)?,
1281            None => Vec::new(),
1282        };
1283        for quad in quads {
1284            let triple = oxirs_core::model::Triple::new(
1285                quad.subject().clone(),
1286                quad.predicate().clone(),
1287                quad.object().clone(),
1288            );
1289            triples.push(triple);
1290        }
1291
1292        Ok(triples)
1293    }
1294
1295    /// Query store for all triples where the node is the object
1296    fn query_node_triples_as_object(
1297        &self,
1298        store: &dyn Store,
1299        node: &Term,
1300    ) -> Result<Vec<oxirs_core::model::Triple>> {
1301        let mut triples = Vec::new();
1302
1303        // Create a pattern to match triples with this node as object
1304        let object = match node {
1305            Term::NamedNode(nn) => Some(oxirs_core::model::Object::NamedNode(nn.clone())),
1306            Term::BlankNode(bn) => Some(oxirs_core::model::Object::BlankNode(bn.clone())),
1307            Term::Literal(lit) => Some(oxirs_core::model::Object::Literal(lit.clone())),
1308            Term::Variable(v) => Some(oxirs_core::model::Object::Variable(v.clone())),
1309            _ => None,
1310        };
1311        let quads = match object {
1312            Some(o) => store.find_quads(None, None, Some(&o), None)?,
1313            None => Vec::new(),
1314        };
1315        for quad in quads {
1316            let triple = oxirs_core::model::Triple::new(
1317                quad.subject().clone(),
1318                quad.predicate().clone(),
1319                quad.object().clone(),
1320            );
1321            triples.push(triple);
1322        }
1323
1324        Ok(triples)
1325    }
1326
1327    /// Clear validation history
1328    pub fn clear_history(&mut self) {
1329        self.previous_results.write().unwrap().clear();
1330    }
1331
1332    /// Get statistics about incremental validation
1333    pub fn get_incremental_stats(&self) -> IncrementalValidationStats {
1334        let snapshots = self.previous_results.read().unwrap();
1335        IncrementalValidationStats {
1336            cached_validations: snapshots.len(),
1337            memory_usage_mb: snapshots.len() * std::mem::size_of::<ValidationSnapshot>()
1338                / (1024 * 1024),
1339        }
1340    }
1341
1342    /// Compute detailed change delta between current state and cached snapshots
1343    pub fn compute_change_delta(
1344        &self,
1345        store: &dyn Store,
1346        current_constraints: &[Constraint],
1347        nodes: &[Term],
1348    ) -> Result<ChangesDelta> {
1349        let mut delta = ChangesDelta::new();
1350        let snapshots = self.previous_results.read().unwrap();
1351
1352        for node in nodes {
1353            // Check if we have a previous snapshot for this node
1354            if let Some(previous_snapshot) = snapshots.get(node) {
1355                // Compute current hashes
1356                let current_property_hash = self.hash_node_properties(store, node)?;
1357                let current_constraint_hash = self.hash_constraints(current_constraints);
1358
1359                // Check for property changes
1360                if current_property_hash != previous_snapshot.properties_hash {
1361                    delta.nodes_with_property_changes.push(NodePropertyChange {
1362                        node: node.clone(),
1363                        previous_hash: previous_snapshot.properties_hash,
1364                        current_hash: current_property_hash,
1365                        property_changes: self.compute_property_changes(store, node)?,
1366                        detected_at: std::time::SystemTime::now(),
1367                    });
1368                }
1369
1370                // Check for constraint changes
1371                if current_constraint_hash != previous_snapshot.constraints_hash {
1372                    delta
1373                        .nodes_with_constraint_changes
1374                        .push(NodeConstraintChange {
1375                            node: node.clone(),
1376                            previous_constraints_hash: previous_snapshot.constraints_hash,
1377                            current_constraints_hash: current_constraint_hash,
1378                            changed_shapes: vec![], // Could be enhanced to track specific shape changes
1379                            detected_at: std::time::SystemTime::now(),
1380                        });
1381                }
1382            } else {
1383                // New node detected
1384                delta.new_nodes.push(node.clone());
1385            }
1386        }
1387
1388        // Detect deleted nodes (in snapshots but not in current nodes)
1389        let current_nodes: std::collections::HashSet<&Term> = nodes.iter().collect();
1390
1391        for snapshot_node in snapshots.keys() {
1392            if !current_nodes.contains(snapshot_node) {
1393                delta.deleted_nodes.push(snapshot_node.clone());
1394            }
1395        }
1396
1397        Ok(delta)
1398    }
1399
1400    /// Generate change events for external system integration
1401    pub fn generate_change_events(
1402        &self,
1403        delta: &ChangesDelta,
1404        _validation_results: &[crate::constraints::ConstraintEvaluationResult],
1405    ) -> Vec<ChangeEvent> {
1406        let mut events = Vec::new();
1407        let timestamp = std::time::SystemTime::now();
1408
1409        // Generate events for property changes
1410        for property_change in &delta.nodes_with_property_changes {
1411            let event_id = format!(
1412                "prop_change_{}_{}",
1413                property_change.node.as_str(),
1414                timestamp
1415                    .duration_since(std::time::UNIX_EPOCH)
1416                    .unwrap()
1417                    .as_millis()
1418            );
1419
1420            let payload = serde_json::json!({
1421                "node": property_change.node.as_str(),
1422                "previous_hash": property_change.previous_hash,
1423                "current_hash": property_change.current_hash,
1424                "detected_at": property_change.detected_at
1425                    .duration_since(std::time::UNIX_EPOCH)
1426                    .unwrap()
1427                    .as_secs()
1428            });
1429
1430            events.push(ChangeEvent {
1431                event_type: ChangeEventType::NodePropertiesChanged,
1432                node: property_change.node.clone(),
1433                shape_context: None,
1434                payload,
1435                timestamp,
1436                event_id,
1437            });
1438        }
1439
1440        // Generate events for constraint changes
1441        for constraint_change in &delta.nodes_with_constraint_changes {
1442            let event_id = format!(
1443                "constraint_change_{}_{}",
1444                constraint_change.node.as_str(),
1445                timestamp
1446                    .duration_since(std::time::UNIX_EPOCH)
1447                    .unwrap()
1448                    .as_millis()
1449            );
1450
1451            let payload = serde_json::json!({
1452                "node": constraint_change.node.as_str(),
1453                "previous_constraints_hash": constraint_change.previous_constraints_hash,
1454                "current_constraints_hash": constraint_change.current_constraints_hash,
1455                "changed_shapes": constraint_change.changed_shapes
1456            });
1457
1458            events.push(ChangeEvent {
1459                event_type: ChangeEventType::ShapeConstraintsChanged,
1460                node: constraint_change.node.clone(),
1461                shape_context: None,
1462                payload,
1463                timestamp,
1464                event_id,
1465            });
1466        }
1467
1468        // Generate events for new nodes
1469        for new_node in &delta.new_nodes {
1470            let event_id = format!(
1471                "node_added_{}_{}",
1472                new_node.as_str(),
1473                timestamp
1474                    .duration_since(std::time::UNIX_EPOCH)
1475                    .unwrap()
1476                    .as_millis()
1477            );
1478
1479            let payload = serde_json::json!({
1480                "node": new_node.as_str(),
1481                "detected_at": timestamp
1482                    .duration_since(std::time::UNIX_EPOCH)
1483                    .unwrap()
1484                    .as_secs()
1485            });
1486
1487            events.push(ChangeEvent {
1488                event_type: ChangeEventType::NodeAdded,
1489                node: new_node.clone(),
1490                shape_context: None,
1491                payload,
1492                timestamp,
1493                event_id,
1494            });
1495        }
1496
1497        // Generate events for deleted nodes
1498        for deleted_node in &delta.deleted_nodes {
1499            let event_id = format!(
1500                "node_removed_{}_{}",
1501                deleted_node.as_str(),
1502                timestamp
1503                    .duration_since(std::time::UNIX_EPOCH)
1504                    .unwrap()
1505                    .as_millis()
1506            );
1507
1508            let payload = serde_json::json!({
1509                "node": deleted_node.as_str(),
1510                "detected_at": timestamp
1511                    .duration_since(std::time::UNIX_EPOCH)
1512                    .unwrap()
1513                    .as_secs()
1514            });
1515
1516            events.push(ChangeEvent {
1517                event_type: ChangeEventType::NodeRemoved,
1518                node: deleted_node.clone(),
1519                shape_context: None,
1520                payload,
1521                timestamp,
1522                event_id,
1523            });
1524        }
1525
1526        events
1527    }
1528
1529    /// Compute specific property changes for a node (simplified implementation)
1530    fn compute_property_changes(
1531        &self,
1532        store: &dyn Store,
1533        node: &Term,
1534    ) -> Result<Vec<PropertyChange>> {
1535        // This is a simplified implementation that could be enhanced with:
1536        // 1. Actual before/after triple comparison
1537        // 2. Property-specific change detection
1538        // 3. Integration with store change logs
1539
1540        let mut changes = Vec::new();
1541        let current_triples = self.query_node_triples_as_subject(store, node)?;
1542
1543        // For each triple, we could compare with cached previous state
1544        // For now, we'll create a placeholder change indicating some property changed
1545        if !current_triples.is_empty() {
1546            for triple in current_triples.iter().take(5) {
1547                // Limit for performance
1548                // Convert predicate to NamedNode and object to Term
1549                if let oxirs_core::model::Predicate::NamedNode(predicate_nn) = triple.predicate() {
1550                    changes.push(PropertyChange {
1551                        subject: node.clone(),
1552                        property: predicate_nn.clone(),
1553                        change_type: PropertyChangeType::Modified, // Simplified assumption
1554                        old_value: None, // Would need previous state comparison
1555                        new_value: Some(match triple.object() {
1556                            oxirs_core::model::Object::NamedNode(nn) => Term::NamedNode(nn.clone()),
1557                            oxirs_core::model::Object::BlankNode(bn) => Term::BlankNode(bn.clone()),
1558                            oxirs_core::model::Object::Literal(lit) => Term::Literal(lit.clone()),
1559                            oxirs_core::model::Object::Variable(v) => Term::Variable(v.clone()),
1560                            _ => continue, // Skip unsupported object types
1561                        }),
1562                        timestamp: std::time::SystemTime::now(),
1563                    });
1564                }
1565            }
1566        }
1567
1568        Ok(changes)
1569    }
1570
1571    /// Reconstruct term from string key (simplified approach)
1572    fn reconstruct_term_from_key(&self, key: &str) -> Result<Term> {
1573        // This is a simplified implementation that could be enhanced
1574        // In practice, you'd want a more robust term serialization/deserialization
1575        if key.starts_with("NamedNode(") {
1576            let iri = key
1577                .trim_start_matches("NamedNode(\"")
1578                .trim_end_matches("\")");
1579            oxirs_core::model::NamedNode::new(iri)
1580                .map(Term::NamedNode)
1581                .map_err(|e| ShaclError::ValidationEngine(format!("Invalid IRI: {e}")))
1582        } else {
1583            Err(ShaclError::ValidationEngine(format!(
1584                "Unsupported term key format: {key}"
1585            )))
1586        }
1587    }
1588}
1589
1590/// Enhanced result of incremental validation with delta processing
1591#[derive(Debug, Clone)]
1592pub struct IncrementalValidationResult {
1593    /// Nodes that were revalidated due to changes
1594    pub revalidated_nodes: usize,
1595    /// Nodes that were skipped (no changes detected)
1596    pub skipped_nodes: usize,
1597    /// New violations found during incremental validation
1598    pub new_violations: usize,
1599    /// Total processing time for incremental validation
1600    pub total_processing_time: Duration,
1601    /// Detailed change delta information
1602    pub change_delta: ChangesDelta,
1603    /// Specific violations that were resolved (no longer violations)
1604    pub resolved_violations: Vec<crate::validation::ValidationViolation>,
1605    /// Specific new violations found
1606    pub new_violation_details: Vec<crate::validation::ValidationViolation>,
1607    /// Change events for external systems
1608    pub change_events: Vec<ChangeEvent>,
1609}
1610
1611impl IncrementalValidationResult {
1612    fn new() -> Self {
1613        Self {
1614            revalidated_nodes: 0,
1615            skipped_nodes: 0,
1616            new_violations: 0,
1617            total_processing_time: Duration::ZERO,
1618            change_delta: ChangesDelta::new(),
1619            resolved_violations: Vec::new(),
1620            new_violation_details: Vec::new(),
1621            change_events: Vec::new(),
1622        }
1623    }
1624
1625    /// Get the efficiency ratio (percentage of nodes skipped)
1626    pub fn efficiency_ratio(&self) -> f64 {
1627        let total_nodes = self.revalidated_nodes + self.skipped_nodes;
1628        if total_nodes == 0 {
1629            0.0
1630        } else {
1631            self.skipped_nodes as f64 / total_nodes as f64
1632        }
1633    }
1634
1635    /// Get the net change in violations (positive = more violations, negative = fewer)
1636    pub fn net_violation_change(&self) -> i32 {
1637        self.new_violation_details.len() as i32 - self.resolved_violations.len() as i32
1638    }
1639
1640    /// Check if this incremental validation improved overall conformance
1641    pub fn improved_conformance(&self) -> bool {
1642        self.resolved_violations.len() > self.new_violation_details.len()
1643    }
1644
1645    /// Get summary of changes for reporting
1646    pub fn change_summary(&self) -> String {
1647        format!(
1648            "Incremental validation: {} nodes revalidated, {} skipped ({}% efficiency), {} net violation change",
1649            self.revalidated_nodes,
1650            self.skipped_nodes,
1651            (self.efficiency_ratio() * 100.0) as u32,
1652            self.net_violation_change()
1653        )
1654    }
1655}
1656
1657/// Comprehensive delta information about detected changes
1658#[derive(Debug, Clone)]
1659pub struct ChangesDelta {
1660    /// Nodes that had property changes
1661    pub nodes_with_property_changes: Vec<NodePropertyChange>,
1662    /// Nodes that had constraint changes (shape modifications)
1663    pub nodes_with_constraint_changes: Vec<NodeConstraintChange>,
1664    /// New nodes detected
1665    pub new_nodes: Vec<oxirs_core::model::Term>,
1666    /// Deleted nodes detected
1667    pub deleted_nodes: Vec<oxirs_core::model::Term>,
1668    /// Property-level changes
1669    pub property_changes: Vec<PropertyChange>,
1670}
1671
1672impl ChangesDelta {
1673    fn new() -> Self {
1674        Self {
1675            nodes_with_property_changes: Vec::new(),
1676            nodes_with_constraint_changes: Vec::new(),
1677            new_nodes: Vec::new(),
1678            deleted_nodes: Vec::new(),
1679            property_changes: Vec::new(),
1680        }
1681    }
1682
1683    /// Check if any significant changes were detected
1684    pub fn has_changes(&self) -> bool {
1685        !self.nodes_with_property_changes.is_empty()
1686            || !self.nodes_with_constraint_changes.is_empty()
1687            || !self.new_nodes.is_empty()
1688            || !self.deleted_nodes.is_empty()
1689            || !self.property_changes.is_empty()
1690    }
1691
1692    /// Get total number of changed entities
1693    pub fn total_changes(&self) -> usize {
1694        self.nodes_with_property_changes.len()
1695            + self.nodes_with_constraint_changes.len()
1696            + self.new_nodes.len()
1697            + self.deleted_nodes.len()
1698            + self.property_changes.len()
1699    }
1700}
1701
1702/// Details about property changes for a specific node
1703#[derive(Debug, Clone)]
1704pub struct NodePropertyChange {
1705    /// The node that changed
1706    pub node: oxirs_core::model::Term,
1707    /// Hash before the change
1708    pub previous_hash: u64,
1709    /// Hash after the change
1710    pub current_hash: u64,
1711    /// Specific property changes (if available)
1712    pub property_changes: Vec<PropertyChange>,
1713    /// Timestamp of change detection
1714    pub detected_at: std::time::SystemTime,
1715}
1716
1717/// Details about constraint changes affecting a node
1718#[derive(Debug, Clone)]
1719pub struct NodeConstraintChange {
1720    /// The node affected by constraint changes
1721    pub node: oxirs_core::model::Term,
1722    /// Hash of constraints before the change
1723    pub previous_constraints_hash: u64,
1724    /// Hash of constraints after the change
1725    pub current_constraints_hash: u64,
1726    /// Shape IDs that changed
1727    pub changed_shapes: Vec<crate::ShapeId>,
1728    /// Timestamp of change detection
1729    pub detected_at: std::time::SystemTime,
1730}
1731
1732/// Specific property-level change information
1733#[derive(Debug, Clone)]
1734pub struct PropertyChange {
1735    /// Subject of the changed triple
1736    pub subject: oxirs_core::model::Term,
1737    /// Property/predicate that changed
1738    pub property: oxirs_core::model::NamedNode,
1739    /// Change type
1740    pub change_type: PropertyChangeType,
1741    /// Old value (for modifications and deletions)
1742    pub old_value: Option<oxirs_core::model::Term>,
1743    /// New value (for modifications and additions)
1744    pub new_value: Option<oxirs_core::model::Term>,
1745    /// Timestamp of the change
1746    pub timestamp: std::time::SystemTime,
1747}
1748
1749/// Type of property change
1750#[derive(Debug, Clone, PartialEq)]
1751pub enum PropertyChangeType {
1752    /// Property value was added
1753    Added,
1754    /// Property value was modified
1755    Modified,
1756    /// Property value was deleted
1757    Deleted,
1758}
1759
1760/// Change events for external system integration
1761#[derive(Debug, Clone)]
1762pub struct ChangeEvent {
1763    /// Event type
1764    pub event_type: ChangeEventType,
1765    /// Node affected by the change
1766    pub node: oxirs_core::model::Term,
1767    /// Shape context (if applicable)
1768    pub shape_context: Option<crate::ShapeId>,
1769    /// Event payload with detailed information
1770    pub payload: serde_json::Value,
1771    /// Event timestamp
1772    pub timestamp: std::time::SystemTime,
1773    /// Event ID for tracking
1774    pub event_id: String,
1775}
1776
1777/// Types of change events
1778#[derive(Debug, Clone, PartialEq)]
1779pub enum ChangeEventType {
1780    /// Node validation status changed
1781    ValidationStatusChanged,
1782    /// New violation detected
1783    ViolationAdded,
1784    /// Violation resolved
1785    ViolationResolved,
1786    /// Node properties changed
1787    NodePropertiesChanged,
1788    /// Shape constraints changed
1789    ShapeConstraintsChanged,
1790    /// New node added to validation scope
1791    NodeAdded,
1792    /// Node removed from validation scope
1793    NodeRemoved,
1794}
1795
1796/// Statistics for incremental validation
1797#[derive(Debug, Clone)]
1798pub struct IncrementalValidationStats {
1799    pub cached_validations: usize,
1800    pub memory_usage_mb: usize,
1801}
oxirs_shacl/optimization/core.rs

oxirs_shacl/optimization/
core.rs