oxirs_shacl_ai/advanced_pattern_mining/
engine.rs1use std::collections::HashMap;
4use tracing::{debug, info, warn};
5
6use oxirs_core::Store;
7
8use super::algorithms::*;
9use super::cache::IntelligentPatternCache;
10use super::patterns::*;
11use super::sparql::*;
12use super::types::*;
13use crate::Result;
14
15#[derive(Debug)]
17pub struct AdvancedPatternMiningEngine {
18 config: AdvancedPatternMiningConfig,
20
21 stats: PatternMiningStats,
23
24 pattern_cache: IntelligentPatternCache,
26
27 frequency_tables: FrequencyTables,
29}
30
31#[derive(Debug, Default)]
33pub struct FrequencyTables {
34 pub properties: HashMap<String, usize>,
36
37 pub classes: HashMap<String, usize>,
39
40 pub value_patterns: HashMap<String, usize>,
42
43 pub co_occurrence: HashMap<(String, String), usize>,
45}
46
47impl AdvancedPatternMiningEngine {
48 pub fn new() -> Self {
50 Self::with_config(AdvancedPatternMiningConfig::default())
51 }
52
53 pub fn with_config(config: AdvancedPatternMiningConfig) -> Self {
55 Self {
56 config,
57 stats: PatternMiningStats::default(),
58 pattern_cache: IntelligentPatternCache::new(),
59 frequency_tables: FrequencyTables::default(),
60 }
61 }
62
63 pub fn mine_patterns(
65 &mut self,
66 store: &dyn Store,
67 graph_name: Option<&str>,
68 ) -> Result<Vec<AdvancedPattern>> {
69 let start_time = std::time::Instant::now();
70 info!("Starting advanced pattern mining");
71
72 self.build_frequency_tables(store, graph_name)?;
74
75 let frequent_itemsets = self.discover_frequent_itemsets()?;
77 debug!("Found {} frequent itemsets", frequent_itemsets.len());
78
79 let mut patterns = self.generate_association_rules(&frequent_itemsets)?;
81 debug!("Generated {} association rules", patterns.len());
82
83 if self.config.enable_temporal_analysis {
85 self.enhance_with_temporal_analysis(&mut patterns, store, graph_name)?;
86 }
87
88 if self.config.enable_hierarchical_patterns {
90 self.analyze_hierarchical_patterns(&mut patterns)?;
91 }
92
93 self.generate_constraint_suggestions(&mut patterns)?;
95
96 patterns.retain(|p| p.quality_score >= self.config.quality_threshold);
98
99 self.stats.total_patterns = patterns.len();
101 self.stats.high_quality_patterns =
102 patterns.iter().filter(|p| p.quality_score >= 0.9).count();
103 self.stats.temporal_patterns = patterns
104 .iter()
105 .filter(|p| p.temporal_info.is_some())
106 .count();
107 self.stats.hierarchical_patterns =
108 patterns.iter().filter(|p| p.hierarchy_level > 0).count();
109 self.stats.processing_time_ms = start_time.elapsed().as_millis() as u64;
110
111 info!(
112 "Pattern mining completed: {} patterns found in {}ms",
113 patterns.len(),
114 self.stats.processing_time_ms
115 );
116
117 Ok(patterns)
118 }
119
120 pub fn get_stats(&self) -> &PatternMiningStats {
122 &self.stats
123 }
124
125 pub fn get_config(&self) -> &AdvancedPatternMiningConfig {
127 &self.config
128 }
129
130 pub fn update_config(&mut self, config: AdvancedPatternMiningConfig) {
132 self.config = config;
133 }
134
135 pub fn get_frequency_tables(&self) -> &FrequencyTables {
137 &self.frequency_tables
138 }
139
140 pub fn clear_cache(&mut self) {
142 self.pattern_cache.clear();
143 }
144
145 pub fn get_cache_stats(&self) -> crate::Result<serde_json::Value> {
147 self.pattern_cache.get_stats()
148 }
149
150 pub fn warm_cache(&mut self) -> usize {
152 0
154 }
155
156 pub fn get_cache_analytics(&self) -> serde_json::Value {
158 self.get_cache_stats().unwrap_or_default()
159 }
160
161 pub fn get_advanced_cache_statistics(&self) -> serde_json::Value {
163 self.get_cache_stats().unwrap_or_default()
164 }
165
166 pub fn evaluate_cache_strategy(&self) -> bool {
168 false
170 }
171
172 pub fn get_cache_recommendations(&self) -> Vec<String> {
174 vec![]
176 }
177
178 pub fn get_cache_eviction_strategy(&self) -> String {
180 "LRU".to_string()
182 }
183
184 pub fn mine_sequential_patterns(
186 &mut self,
187 store: &dyn Store,
188 graph_name: Option<&str>,
189 _min_support: f64,
190 ) -> crate::Result<Vec<AdvancedPattern>> {
191 self.mine_patterns(store, graph_name)
193 }
194
195 pub fn mine_graph_patterns(
197 &mut self,
198 store: &dyn Store,
199 graph_name: Option<&str>,
200 _max_size: usize,
201 ) -> crate::Result<Vec<AdvancedPattern>> {
202 self.mine_patterns(store, graph_name)
204 }
205
206 pub fn mine_enhanced_temporal_patterns(
208 &mut self,
209 store: &dyn Store,
210 graph_name: Option<&str>,
211 _granularity: crate::advanced_pattern_mining::TimeGranularity,
212 ) -> crate::Result<Vec<AdvancedPattern>> {
213 self.mine_patterns(store, graph_name)
215 }
216
217 pub fn rank_patterns_advanced(
219 &self,
220 patterns: &mut [AdvancedPattern],
221 _criteria: &crate::advanced_pattern_mining::PatternRankingCriteria,
222 ) -> Vec<f64> {
223 patterns.iter().map(|p| p.quality_score).collect()
225 }
226
227 pub fn perform_enhanced_statistical_analysis(
229 &self,
230 patterns: &[AdvancedPattern],
231 ) -> serde_json::Value {
232 serde_json::json!({
234 "total_patterns": patterns.len(),
235 "average_quality": patterns.iter().map(|p| p.quality_score).sum::<f64>() / patterns.len() as f64
236 })
237 }
238
239 pub fn get_cached_patterns(&self, _cache_key: &str) -> Option<Vec<AdvancedPattern>> {
241 None
243 }
244
245 pub fn cache_patterns(&mut self, _cache_key: String, _patterns: Vec<AdvancedPattern>) {
247 }
249
250 fn build_frequency_tables(
252 &mut self,
253 store: &dyn Store,
254 graph_name: Option<&str>,
255 ) -> Result<()> {
256 debug!("Building frequency tables from RDF store");
257
258 self.analyze_property_frequencies(store, graph_name)?;
260 self.analyze_class_frequencies(store, graph_name)?;
261 self.analyze_value_pattern_frequencies(store, graph_name)?;
262 self.build_co_occurrence_matrix(store, graph_name)?;
263
264 debug!(
265 "Built frequency tables: {} properties, {} classes, {} value patterns",
266 self.frequency_tables.properties.len(),
267 self.frequency_tables.classes.len(),
268 self.frequency_tables.value_patterns.len()
269 );
270
271 Ok(())
272 }
273
274 fn analyze_property_frequencies(
276 &mut self,
277 store: &dyn Store,
278 graph_name: Option<&str>,
279 ) -> Result<()> {
280 debug!("Analyzing property frequencies");
281
282 let query = r#"
284 SELECT ?property (COUNT(*) as ?count) WHERE {
285 ?subject ?property ?object .
286 } GROUP BY ?property
287 ORDER BY DESC(?count)
288 "#;
289
290 match execute_sparql_query(store, query, graph_name) {
292 Ok(results) => {
293 process_property_frequency_results(&mut self.frequency_tables, results)?;
294 }
295 Err(e) => {
296 warn!(
297 "Failed to execute property frequency query: {}, using fallback analysis",
298 e
299 );
300 fallback_property_analysis(&mut self.frequency_tables, store, graph_name)?;
301 }
302 }
303
304 Ok(())
305 }
306
307 fn analyze_class_frequencies(
309 &mut self,
310 store: &dyn Store,
311 graph_name: Option<&str>,
312 ) -> Result<()> {
313 debug!("Analyzing class frequencies");
314
315 let query = r#"
316 SELECT ?class (COUNT(DISTINCT ?instance) as ?count) WHERE {
317 ?instance a ?class .
318 } GROUP BY ?class
319 ORDER BY DESC(?count)
320 "#;
321
322 match execute_sparql_query(store, query, graph_name) {
323 Ok(results) => {
324 process_class_frequency_results(&mut self.frequency_tables, results)?;
325 }
326 Err(e) => {
327 warn!(
328 "Failed to execute class frequency query: {}, using fallback analysis",
329 e
330 );
331 fallback_class_analysis(&mut self.frequency_tables, store, graph_name)?;
332 }
333 }
334
335 Ok(())
336 }
337
338 fn analyze_value_pattern_frequencies(
340 &mut self,
341 store: &dyn Store,
342 graph_name: Option<&str>,
343 ) -> Result<()> {
344 debug!("Analyzing value pattern frequencies");
345
346 let query = r#"
347 SELECT ?pattern (COUNT(*) as ?count) WHERE {
348 ?subject ?property ?object .
349 BIND(REPLACE(STR(?object), "^(.*?)(\\d+|[a-zA-Z]+).*$", "$2") AS ?pattern)
350 FILTER(STRLEN(?pattern) > 0)
351 } GROUP BY ?pattern
352 HAVING (?count > 10)
353 ORDER BY DESC(?count)
354 "#;
355
356 match execute_sparql_query(store, query, graph_name) {
357 Ok(results) => {
358 process_value_pattern_results(&mut self.frequency_tables, results)?;
359 }
360 Err(e) => {
361 warn!(
362 "Failed to execute value pattern query: {}, using fallback analysis",
363 e
364 );
365 fallback_value_pattern_analysis(&mut self.frequency_tables, store, graph_name)?;
366 }
367 }
368
369 Ok(())
370 }
371
372 fn build_co_occurrence_matrix(
374 &mut self,
375 store: &dyn Store,
376 graph_name: Option<&str>,
377 ) -> Result<()> {
378 debug!("Building co-occurrence matrix");
379
380 let query = r#"
381 SELECT ?prop1 ?prop2 (COUNT(*) as ?count) WHERE {
382 ?subject ?prop1 ?obj1 .
383 ?subject ?prop2 ?obj2 .
384 FILTER(?prop1 != ?prop2)
385 } GROUP BY ?prop1 ?prop2
386 HAVING (?count > 5)
387 ORDER BY DESC(?count)
388 "#;
389
390 match execute_sparql_query(store, query, graph_name) {
391 Ok(results) => {
392 process_co_occurrence_results(&mut self.frequency_tables, results)?;
393 }
394 Err(e) => {
395 warn!(
396 "Failed to execute co-occurrence query: {}, using fallback analysis",
397 e
398 );
399 fallback_co_occurrence_analysis(&mut self.frequency_tables, store, graph_name)?;
400 }
401 }
402
403 Ok(())
404 }
405
406 fn discover_frequent_itemsets(&self) -> Result<Vec<Vec<String>>> {
408 discover_frequent_itemsets(&self.frequency_tables, &self.config)
409 }
410
411 fn generate_association_rules(
413 &self,
414 frequent_itemsets: &[Vec<String>],
415 ) -> Result<Vec<AdvancedPattern>> {
416 generate_association_rules(frequent_itemsets, &self.frequency_tables, &self.config)
417 }
418
419 fn enhance_with_temporal_analysis(
421 &self,
422 patterns: &mut [AdvancedPattern],
423 store: &dyn Store,
424 graph_name: Option<&str>,
425 ) -> Result<()> {
426 enhance_with_temporal_analysis(patterns, store, graph_name, &self.config)
427 }
428
429 fn analyze_hierarchical_patterns(&self, patterns: &mut [AdvancedPattern]) -> Result<()> {
431 analyze_hierarchical_patterns(patterns, &self.frequency_tables, &self.config)
432 }
433
434 fn generate_constraint_suggestions(&self, patterns: &mut [AdvancedPattern]) -> Result<()> {
436 generate_constraint_suggestions(patterns, &self.config)
437 }
438}
439
440impl Default for AdvancedPatternMiningEngine {
441 fn default() -> Self {
442 Self::new()
443 }
444}