1use std::collections::{HashMap, HashSet};
7
8use crate::SymbolTable;
9
10#[derive(Clone, Debug)]
12pub struct SchemaStatistics {
13 pub domain_count: usize,
15 pub predicate_count: usize,
17 pub variable_count: usize,
19 pub total_cardinality: usize,
21 pub avg_cardinality: f64,
23 pub max_cardinality: usize,
25 pub min_cardinality: usize,
27 pub arity_distribution: HashMap<usize, usize>,
29 pub domain_usage_frequency: HashMap<String, usize>,
31 pub unused_domains: Vec<String>,
33 pub predicates_by_arity: HashMap<usize, Vec<String>>,
35}
36
37impl SchemaStatistics {
38 pub fn compute(table: &SymbolTable) -> Self {
57 let domain_count = table.domains.len();
58 let predicate_count = table.predicates.len();
59 let variable_count = table.variables.len();
60
61 let cardinalities: Vec<usize> = table.domains.values().map(|d| d.cardinality).collect();
63 let total_cardinality: usize = cardinalities.iter().sum();
64 let avg_cardinality = if domain_count > 0 {
65 total_cardinality as f64 / domain_count as f64
66 } else {
67 0.0
68 };
69 let max_cardinality = cardinalities.iter().copied().max().unwrap_or(0);
70 let min_cardinality = cardinalities.iter().copied().min().unwrap_or(0);
71
72 let mut arity_distribution = HashMap::new();
74 let mut predicates_by_arity: HashMap<usize, Vec<String>> = HashMap::new();
75 for (name, pred) in &table.predicates {
76 let arity = pred.arg_domains.len();
77 *arity_distribution.entry(arity).or_insert(0) += 1;
78 predicates_by_arity
79 .entry(arity)
80 .or_default()
81 .push(name.clone());
82 }
83
84 let mut domain_usage_frequency = HashMap::new();
86 for pred in table.predicates.values() {
87 for domain in &pred.arg_domains {
88 *domain_usage_frequency.entry(domain.clone()).or_insert(0) += 1;
89 }
90 }
91
92 let used_domains: HashSet<_> = domain_usage_frequency.keys().cloned().collect();
94 let unused_domains: Vec<String> = table
95 .domains
96 .keys()
97 .filter(|d| !used_domains.contains(*d))
98 .cloned()
99 .collect();
100
101 Self {
102 domain_count,
103 predicate_count,
104 variable_count,
105 total_cardinality,
106 avg_cardinality,
107 max_cardinality,
108 min_cardinality,
109 arity_distribution,
110 domain_usage_frequency,
111 unused_domains,
112 predicates_by_arity,
113 }
114 }
115
116 pub fn most_used_domains(&self, n: usize) -> Vec<(String, usize)> {
118 let mut usage: Vec<_> = self.domain_usage_frequency.iter().collect();
119 usage.sort_by(|a, b| b.1.cmp(a.1));
120 usage
121 .into_iter()
122 .take(n)
123 .map(|(d, &count)| (d.clone(), count))
124 .collect()
125 }
126
127 pub fn least_used_domains(&self, n: usize) -> Vec<(String, usize)> {
129 let mut usage: Vec<_> = self.domain_usage_frequency.iter().collect();
130 usage.sort_by(|a, b| a.1.cmp(b.1));
131 usage
132 .into_iter()
133 .take(n)
134 .map(|(d, &count)| (d.clone(), count))
135 .collect()
136 }
137
138 pub fn complexity_score(&self) -> f64 {
146 let domain_factor = self.domain_count as f64;
147 let predicate_factor = self.predicate_count as f64;
148 let arity_diversity = self.arity_distribution.len() as f64;
149 let usage_variance = self.compute_usage_variance();
150
151 domain_factor * 0.2 + predicate_factor * 0.3 + arity_diversity * 0.2 + usage_variance * 0.3
153 }
154
155 fn compute_usage_variance(&self) -> f64 {
156 if self.domain_usage_frequency.is_empty() {
157 return 0.0;
158 }
159
160 let counts: Vec<f64> = self
161 .domain_usage_frequency
162 .values()
163 .map(|&c| c as f64)
164 .collect();
165 let mean = counts.iter().sum::<f64>() / counts.len() as f64;
166 let variance = counts.iter().map(|c| (c - mean).powi(2)).sum::<f64>() / counts.len() as f64;
167 variance.sqrt()
168 }
169}
170
171#[derive(Clone, Debug)]
173pub struct SchemaRecommendations {
174 pub issues: Vec<SchemaIssue>,
176 pub suggestions: Vec<String>,
178}
179
180#[derive(Clone, Debug, PartialEq, Eq)]
182pub enum SchemaIssue {
183 UnusedDomain(String),
185 ZeroCardinalityDomain(String),
187 HighCardinalityDomain(String, usize),
189 HighArityPredicate(String, usize),
191 NoPredicates,
193 NoDomains,
195}
196
197impl SchemaIssue {
198 pub fn description(&self) -> String {
200 match self {
201 Self::UnusedDomain(name) => format!("Domain '{}' is defined but never used", name),
202 Self::ZeroCardinalityDomain(name) => {
203 format!("Domain '{}' has zero cardinality", name)
204 }
205 Self::HighCardinalityDomain(name, card) => {
206 format!(
207 "Domain '{}' has very high cardinality ({}), which may impact performance",
208 name, card
209 )
210 }
211 Self::HighArityPredicate(name, arity) => {
212 format!(
213 "Predicate '{}' has high arity ({}), consider decomposition",
214 name, arity
215 )
216 }
217 Self::NoPredicates => "Schema has no predicates defined".to_string(),
218 Self::NoDomains => "Schema has no domains defined".to_string(),
219 }
220 }
221
222 pub fn severity(&self) -> u8 {
224 match self {
225 Self::UnusedDomain(_) => 1,
226 Self::ZeroCardinalityDomain(_) => 2,
227 Self::HighCardinalityDomain(_, _) => 1,
228 Self::HighArityPredicate(_, _) => 1,
229 Self::NoPredicates => 2,
230 Self::NoDomains => 3,
231 }
232 }
233}
234
235pub struct SchemaAnalyzer;
237
238impl SchemaAnalyzer {
239 pub fn analyze(table: &SymbolTable) -> SchemaRecommendations {
253 let mut issues = Vec::new();
254 let mut suggestions = Vec::new();
255
256 if table.domains.is_empty() {
258 issues.push(SchemaIssue::NoDomains);
259 suggestions.push("Define at least one domain for your schema".to_string());
260 return SchemaRecommendations {
261 issues,
262 suggestions,
263 };
264 }
265
266 if table.predicates.is_empty() {
268 issues.push(SchemaIssue::NoPredicates);
269 suggestions.push("Define predicates to enable reasoning over your domains".to_string());
270 }
271
272 let stats = SchemaStatistics::compute(table);
274 const HIGH_CARDINALITY_THRESHOLD: usize = 100_000;
275
276 for (name, domain) in &table.domains {
277 if domain.cardinality == 0 {
279 issues.push(SchemaIssue::ZeroCardinalityDomain(name.clone()));
280 }
281
282 if domain.cardinality > HIGH_CARDINALITY_THRESHOLD {
284 issues.push(SchemaIssue::HighCardinalityDomain(
285 name.clone(),
286 domain.cardinality,
287 ));
288 }
289
290 if stats.unused_domains.contains(name) {
292 issues.push(SchemaIssue::UnusedDomain(name.clone()));
293 suggestions.push(format!(
294 "Consider removing unused domain '{}' or defining predicates that use it",
295 name
296 ));
297 }
298 }
299
300 const HIGH_ARITY_THRESHOLD: usize = 5;
302 for (name, pred) in &table.predicates {
303 if pred.arg_domains.len() > HIGH_ARITY_THRESHOLD {
304 issues.push(SchemaIssue::HighArityPredicate(
305 name.clone(),
306 pred.arg_domains.len(),
307 ));
308 suggestions.push(format!(
309 "Consider decomposing high-arity predicate '{}' into smaller predicates",
310 name
311 ));
312 }
313 }
314
315 if stats.domain_count > 0 && stats.predicate_count == 0 {
317 suggestions
318 .push("Add predicates to establish relationships between your domains".to_string());
319 }
320
321 if stats.variable_count == 0 && stats.predicate_count > 0 {
322 suggestions
323 .push("Consider binding variables to enable quantification in rules".to_string());
324 }
325
326 SchemaRecommendations {
327 issues,
328 suggestions,
329 }
330 }
331}
332
333#[cfg(test)]
334mod tests {
335 use super::*;
336 use crate::{DomainInfo, PredicateInfo};
337
338 #[test]
339 fn test_statistics_empty_table() {
340 let table = SymbolTable::new();
341 let stats = SchemaStatistics::compute(&table);
342
343 assert_eq!(stats.domain_count, 0);
344 assert_eq!(stats.predicate_count, 0);
345 assert_eq!(stats.variable_count, 0);
346 }
347
348 #[test]
349 fn test_statistics_with_data() {
350 let mut table = SymbolTable::new();
351 table.add_domain(DomainInfo::new("Person", 100)).unwrap();
352 table.add_domain(DomainInfo::new("Location", 50)).unwrap();
353 table
354 .add_predicate(PredicateInfo::new(
355 "knows",
356 vec!["Person".into(), "Person".into()],
357 ))
358 .unwrap();
359 table
360 .add_predicate(PredicateInfo::new(
361 "at",
362 vec!["Person".into(), "Location".into()],
363 ))
364 .unwrap();
365
366 let stats = SchemaStatistics::compute(&table);
367
368 assert_eq!(stats.domain_count, 2);
369 assert_eq!(stats.predicate_count, 2);
370 assert_eq!(stats.total_cardinality, 150);
371 assert_eq!(stats.avg_cardinality, 75.0);
372 assert_eq!(stats.max_cardinality, 100);
373 assert_eq!(stats.min_cardinality, 50);
374
375 assert_eq!(stats.domain_usage_frequency.get("Person"), Some(&3));
377 assert_eq!(stats.domain_usage_frequency.get("Location"), Some(&1));
378 assert!(stats.unused_domains.is_empty());
379 }
380
381 #[test]
382 fn test_unused_domains() {
383 let mut table = SymbolTable::new();
384 table.add_domain(DomainInfo::new("Person", 100)).unwrap();
385 table.add_domain(DomainInfo::new("Unused", 50)).unwrap();
386 table
387 .add_predicate(PredicateInfo::new("age", vec!["Person".into()]))
388 .unwrap();
389
390 let stats = SchemaStatistics::compute(&table);
391 assert_eq!(stats.unused_domains, vec!["Unused"]);
392 }
393
394 #[test]
395 fn test_arity_distribution() {
396 let mut table = SymbolTable::new();
397 table.add_domain(DomainInfo::new("D", 10)).unwrap();
398 table
399 .add_predicate(PredicateInfo::new("p1", vec!["D".into()]))
400 .unwrap();
401 table
402 .add_predicate(PredicateInfo::new("p2", vec!["D".into(), "D".into()]))
403 .unwrap();
404 table
405 .add_predicate(PredicateInfo::new("p3", vec!["D".into(), "D".into()]))
406 .unwrap();
407
408 let stats = SchemaStatistics::compute(&table);
409 assert_eq!(stats.arity_distribution.get(&1), Some(&1));
410 assert_eq!(stats.arity_distribution.get(&2), Some(&2));
411 }
412
413 #[test]
414 fn test_analyzer_no_domains() {
415 let table = SymbolTable::new();
416 let recs = SchemaAnalyzer::analyze(&table);
417
418 assert!(!recs.issues.is_empty());
419 assert!(recs.issues.contains(&SchemaIssue::NoDomains));
420 }
421
422 #[test]
423 fn test_analyzer_zero_cardinality() {
424 let mut table = SymbolTable::new();
425 table.add_domain(DomainInfo::new("Person", 0)).unwrap();
426
427 let recs = SchemaAnalyzer::analyze(&table);
428 assert!(recs
429 .issues
430 .contains(&SchemaIssue::ZeroCardinalityDomain("Person".to_string())));
431 }
432
433 #[test]
434 fn test_analyzer_unused_domain() {
435 let mut table = SymbolTable::new();
436 table.add_domain(DomainInfo::new("Used", 10)).unwrap();
437 table.add_domain(DomainInfo::new("Unused", 10)).unwrap();
438 table
439 .add_predicate(PredicateInfo::new("p", vec!["Used".into()]))
440 .unwrap();
441
442 let recs = SchemaAnalyzer::analyze(&table);
443 assert!(recs
444 .issues
445 .contains(&SchemaIssue::UnusedDomain("Unused".to_string())));
446 }
447
448 #[test]
449 fn test_analyzer_high_arity() {
450 let mut table = SymbolTable::new();
451 table.add_domain(DomainInfo::new("D", 10)).unwrap();
452 let args = vec!["D".to_string(); 10]; table
454 .add_predicate(PredicateInfo::new("complex", args))
455 .unwrap();
456
457 let recs = SchemaAnalyzer::analyze(&table);
458 assert!(recs
459 .issues
460 .iter()
461 .any(|i| matches!(i, SchemaIssue::HighArityPredicate(_, _))));
462 }
463
464 #[test]
465 fn test_complexity_score() {
466 let mut table = SymbolTable::new();
467 table.add_domain(DomainInfo::new("Person", 100)).unwrap();
468 table
469 .add_predicate(PredicateInfo::new("p", vec!["Person".into()]))
470 .unwrap();
471
472 let stats = SchemaStatistics::compute(&table);
473 let score = stats.complexity_score();
474 assert!(score > 0.0);
475 }
476
477 #[test]
478 fn test_most_used_domains() {
479 let mut table = SymbolTable::new();
480 table.add_domain(DomainInfo::new("A", 10)).unwrap();
481 table.add_domain(DomainInfo::new("B", 10)).unwrap();
482 table
483 .add_predicate(PredicateInfo::new("p1", vec!["A".into(), "A".into()]))
484 .unwrap();
485 table
486 .add_predicate(PredicateInfo::new("p2", vec!["B".into()]))
487 .unwrap();
488
489 let stats = SchemaStatistics::compute(&table);
490 let most_used = stats.most_used_domains(1);
491 assert_eq!(most_used[0].0, "A");
492 assert_eq!(most_used[0].1, 2);
493 }
494}