1use std::collections::{HashMap, HashSet};
7
8use crate::SymbolTable;
9
10#[derive(Clone, Debug)]
12pub struct SchemaStatistics {
13 pub domain_count: usize,
15 pub predicate_count: usize,
17 pub variable_count: usize,
19 pub total_cardinality: usize,
21 pub avg_cardinality: f64,
23 pub max_cardinality: usize,
25 pub min_cardinality: usize,
27 pub arity_distribution: HashMap<usize, usize>,
29 pub domain_usage_frequency: HashMap<String, usize>,
31 pub unused_domains: Vec<String>,
33 pub predicates_by_arity: HashMap<usize, Vec<String>>,
35}
36
37impl SchemaStatistics {
38 pub fn compute(table: &SymbolTable) -> Self {
57 let domain_count = table.domains.len();
58 let predicate_count = table.predicates.len();
59 let variable_count = table.variables.len();
60
61 let cardinalities: Vec<usize> = table.domains.values().map(|d| d.cardinality).collect();
63 let total_cardinality: usize = cardinalities.iter().sum();
64 let avg_cardinality = if domain_count > 0 {
65 total_cardinality as f64 / domain_count as f64
66 } else {
67 0.0
68 };
69 let max_cardinality = cardinalities.iter().copied().max().unwrap_or(0);
70 let min_cardinality = cardinalities.iter().copied().min().unwrap_or(0);
71
72 let mut arity_distribution = HashMap::new();
74 let mut predicates_by_arity: HashMap<usize, Vec<String>> = HashMap::new();
75 for (name, pred) in &table.predicates {
76 let arity = pred.arg_domains.len();
77 *arity_distribution.entry(arity).or_insert(0) += 1;
78 predicates_by_arity
79 .entry(arity)
80 .or_default()
81 .push(name.clone());
82 }
83
84 let mut domain_usage_frequency = HashMap::new();
86 for pred in table.predicates.values() {
87 for domain in &pred.arg_domains {
88 *domain_usage_frequency.entry(domain.clone()).or_insert(0) += 1;
89 }
90 }
91
92 let used_domains: HashSet<_> = domain_usage_frequency.keys().cloned().collect();
94 let unused_domains: Vec<String> = table
95 .domains
96 .keys()
97 .filter(|d| !used_domains.contains(*d))
98 .cloned()
99 .collect();
100
101 Self {
102 domain_count,
103 predicate_count,
104 variable_count,
105 total_cardinality,
106 avg_cardinality,
107 max_cardinality,
108 min_cardinality,
109 arity_distribution,
110 domain_usage_frequency,
111 unused_domains,
112 predicates_by_arity,
113 }
114 }
115
116 pub fn most_used_domains(&self, n: usize) -> Vec<(String, usize)> {
118 let mut usage: Vec<_> = self.domain_usage_frequency.iter().collect();
119 usage.sort_by(|a, b| b.1.cmp(a.1));
120 usage
121 .into_iter()
122 .take(n)
123 .map(|(d, &count)| (d.clone(), count))
124 .collect()
125 }
126
127 pub fn least_used_domains(&self, n: usize) -> Vec<(String, usize)> {
129 let mut usage: Vec<_> = self.domain_usage_frequency.iter().collect();
130 usage.sort_by(|a, b| a.1.cmp(b.1));
131 usage
132 .into_iter()
133 .take(n)
134 .map(|(d, &count)| (d.clone(), count))
135 .collect()
136 }
137
138 pub fn complexity_score(&self) -> f64 {
146 let domain_factor = self.domain_count as f64;
147 let predicate_factor = self.predicate_count as f64;
148 let arity_diversity = self.arity_distribution.len() as f64;
149 let usage_variance = self.compute_usage_variance();
150
151 domain_factor * 0.2 + predicate_factor * 0.3 + arity_diversity * 0.2 + usage_variance * 0.3
153 }
154
155 fn compute_usage_variance(&self) -> f64 {
156 if self.domain_usage_frequency.is_empty() {
157 return 0.0;
158 }
159
160 let counts: Vec<f64> = self
161 .domain_usage_frequency
162 .values()
163 .map(|&c| c as f64)
164 .collect();
165 let mean = counts.iter().sum::<f64>() / counts.len() as f64;
166 let variance = counts.iter().map(|c| (c - mean).powi(2)).sum::<f64>() / counts.len() as f64;
167 variance.sqrt()
168 }
169}
170
171#[derive(Clone, Debug)]
173pub struct SchemaRecommendations {
174 pub issues: Vec<SchemaIssue>,
176 pub suggestions: Vec<String>,
178}
179
180#[derive(Clone, Debug, PartialEq, Eq)]
182pub enum SchemaIssue {
183 UnusedDomain(String),
185 ZeroCardinalityDomain(String),
187 HighCardinalityDomain(String, usize),
189 HighArityPredicate(String, usize),
191 NoPredicates,
193 NoDomains,
195}
196
197impl SchemaIssue {
198 pub fn description(&self) -> String {
200 match self {
201 Self::UnusedDomain(name) => format!("Domain '{}' is defined but never used", name),
202 Self::ZeroCardinalityDomain(name) => {
203 format!("Domain '{}' has zero cardinality", name)
204 }
205 Self::HighCardinalityDomain(name, card) => {
206 format!(
207 "Domain '{}' has very high cardinality ({}), which may impact performance",
208 name, card
209 )
210 }
211 Self::HighArityPredicate(name, arity) => {
212 format!(
213 "Predicate '{}' has high arity ({}), consider decomposition",
214 name, arity
215 )
216 }
217 Self::NoPredicates => "Schema has no predicates defined".to_string(),
218 Self::NoDomains => "Schema has no domains defined".to_string(),
219 }
220 }
221
222 pub fn severity(&self) -> u8 {
224 match self {
225 Self::UnusedDomain(_) => 1,
226 Self::ZeroCardinalityDomain(_) => 2,
227 Self::HighCardinalityDomain(_, _) => 1,
228 Self::HighArityPredicate(_, _) => 1,
229 Self::NoPredicates => 2,
230 Self::NoDomains => 3,
231 }
232 }
233}
234
235pub struct SchemaAnalyzer;
237
238impl SchemaAnalyzer {
239 pub fn analyze(table: &SymbolTable) -> SchemaRecommendations {
253 let mut issues = Vec::new();
254 let mut suggestions = Vec::new();
255
256 if table.domains.is_empty() {
258 issues.push(SchemaIssue::NoDomains);
259 suggestions.push("Define at least one domain for your schema".to_string());
260 return SchemaRecommendations {
261 issues,
262 suggestions,
263 };
264 }
265
266 if table.predicates.is_empty() {
268 issues.push(SchemaIssue::NoPredicates);
269 suggestions.push("Define predicates to enable reasoning over your domains".to_string());
270 }
271
272 let stats = SchemaStatistics::compute(table);
274 const HIGH_CARDINALITY_THRESHOLD: usize = 100_000;
275
276 for (name, domain) in &table.domains {
277 if domain.cardinality == 0 {
279 issues.push(SchemaIssue::ZeroCardinalityDomain(name.clone()));
280 }
281
282 if domain.cardinality > HIGH_CARDINALITY_THRESHOLD {
284 issues.push(SchemaIssue::HighCardinalityDomain(
285 name.clone(),
286 domain.cardinality,
287 ));
288 }
289
290 if stats.unused_domains.contains(name) {
292 issues.push(SchemaIssue::UnusedDomain(name.clone()));
293 suggestions.push(format!(
294 "Consider removing unused domain '{}' or defining predicates that use it",
295 name
296 ));
297 }
298 }
299
300 const HIGH_ARITY_THRESHOLD: usize = 5;
302 for (name, pred) in &table.predicates {
303 if pred.arg_domains.len() > HIGH_ARITY_THRESHOLD {
304 issues.push(SchemaIssue::HighArityPredicate(
305 name.clone(),
306 pred.arg_domains.len(),
307 ));
308 suggestions.push(format!(
309 "Consider decomposing high-arity predicate '{}' into smaller predicates",
310 name
311 ));
312 }
313 }
314
315 if stats.domain_count > 0 && stats.predicate_count == 0 {
317 suggestions
318 .push("Add predicates to establish relationships between your domains".to_string());
319 }
320
321 if stats.variable_count == 0 && stats.predicate_count > 0 {
322 suggestions
323 .push("Consider binding variables to enable quantification in rules".to_string());
324 }
325
326 SchemaRecommendations {
327 issues,
328 suggestions,
329 }
330 }
331}
332
333#[cfg(test)]
334mod tests {
335 use super::*;
336 use crate::{DomainInfo, PredicateInfo};
337
338 #[test]
339 fn test_statistics_empty_table() {
340 let table = SymbolTable::new();
341 let stats = SchemaStatistics::compute(&table);
342
343 assert_eq!(stats.domain_count, 0);
344 assert_eq!(stats.predicate_count, 0);
345 assert_eq!(stats.variable_count, 0);
346 }
347
348 #[test]
349 fn test_statistics_with_data() {
350 let mut table = SymbolTable::new();
351 table
352 .add_domain(DomainInfo::new("Person", 100))
353 .expect("unwrap");
354 table
355 .add_domain(DomainInfo::new("Location", 50))
356 .expect("unwrap");
357 table
358 .add_predicate(PredicateInfo::new(
359 "knows",
360 vec!["Person".into(), "Person".into()],
361 ))
362 .expect("unwrap");
363 table
364 .add_predicate(PredicateInfo::new(
365 "at",
366 vec!["Person".into(), "Location".into()],
367 ))
368 .expect("unwrap");
369
370 let stats = SchemaStatistics::compute(&table);
371
372 assert_eq!(stats.domain_count, 2);
373 assert_eq!(stats.predicate_count, 2);
374 assert_eq!(stats.total_cardinality, 150);
375 assert_eq!(stats.avg_cardinality, 75.0);
376 assert_eq!(stats.max_cardinality, 100);
377 assert_eq!(stats.min_cardinality, 50);
378
379 assert_eq!(stats.domain_usage_frequency.get("Person"), Some(&3));
381 assert_eq!(stats.domain_usage_frequency.get("Location"), Some(&1));
382 assert!(stats.unused_domains.is_empty());
383 }
384
385 #[test]
386 fn test_unused_domains() {
387 let mut table = SymbolTable::new();
388 table
389 .add_domain(DomainInfo::new("Person", 100))
390 .expect("unwrap");
391 table
392 .add_domain(DomainInfo::new("Unused", 50))
393 .expect("unwrap");
394 table
395 .add_predicate(PredicateInfo::new("age", vec!["Person".into()]))
396 .expect("unwrap");
397
398 let stats = SchemaStatistics::compute(&table);
399 assert_eq!(stats.unused_domains, vec!["Unused"]);
400 }
401
402 #[test]
403 fn test_arity_distribution() {
404 let mut table = SymbolTable::new();
405 table.add_domain(DomainInfo::new("D", 10)).expect("unwrap");
406 table
407 .add_predicate(PredicateInfo::new("p1", vec!["D".into()]))
408 .expect("unwrap");
409 table
410 .add_predicate(PredicateInfo::new("p2", vec!["D".into(), "D".into()]))
411 .expect("unwrap");
412 table
413 .add_predicate(PredicateInfo::new("p3", vec!["D".into(), "D".into()]))
414 .expect("unwrap");
415
416 let stats = SchemaStatistics::compute(&table);
417 assert_eq!(stats.arity_distribution.get(&1), Some(&1));
418 assert_eq!(stats.arity_distribution.get(&2), Some(&2));
419 }
420
421 #[test]
422 fn test_analyzer_no_domains() {
423 let table = SymbolTable::new();
424 let recs = SchemaAnalyzer::analyze(&table);
425
426 assert!(!recs.issues.is_empty());
427 assert!(recs.issues.contains(&SchemaIssue::NoDomains));
428 }
429
430 #[test]
431 fn test_analyzer_zero_cardinality() {
432 let mut table = SymbolTable::new();
433 table
434 .add_domain(DomainInfo::new("Person", 0))
435 .expect("unwrap");
436
437 let recs = SchemaAnalyzer::analyze(&table);
438 assert!(recs
439 .issues
440 .contains(&SchemaIssue::ZeroCardinalityDomain("Person".to_string())));
441 }
442
443 #[test]
444 fn test_analyzer_unused_domain() {
445 let mut table = SymbolTable::new();
446 table
447 .add_domain(DomainInfo::new("Used", 10))
448 .expect("unwrap");
449 table
450 .add_domain(DomainInfo::new("Unused", 10))
451 .expect("unwrap");
452 table
453 .add_predicate(PredicateInfo::new("p", vec!["Used".into()]))
454 .expect("unwrap");
455
456 let recs = SchemaAnalyzer::analyze(&table);
457 assert!(recs
458 .issues
459 .contains(&SchemaIssue::UnusedDomain("Unused".to_string())));
460 }
461
462 #[test]
463 fn test_analyzer_high_arity() {
464 let mut table = SymbolTable::new();
465 table.add_domain(DomainInfo::new("D", 10)).expect("unwrap");
466 let args = vec!["D".to_string(); 10]; table
468 .add_predicate(PredicateInfo::new("complex", args))
469 .expect("unwrap");
470
471 let recs = SchemaAnalyzer::analyze(&table);
472 assert!(recs
473 .issues
474 .iter()
475 .any(|i| matches!(i, SchemaIssue::HighArityPredicate(_, _))));
476 }
477
478 #[test]
479 fn test_complexity_score() {
480 let mut table = SymbolTable::new();
481 table
482 .add_domain(DomainInfo::new("Person", 100))
483 .expect("unwrap");
484 table
485 .add_predicate(PredicateInfo::new("p", vec!["Person".into()]))
486 .expect("unwrap");
487
488 let stats = SchemaStatistics::compute(&table);
489 let score = stats.complexity_score();
490 assert!(score > 0.0);
491 }
492
493 #[test]
494 fn test_most_used_domains() {
495 let mut table = SymbolTable::new();
496 table.add_domain(DomainInfo::new("A", 10)).expect("unwrap");
497 table.add_domain(DomainInfo::new("B", 10)).expect("unwrap");
498 table
499 .add_predicate(PredicateInfo::new("p1", vec!["A".into(), "A".into()]))
500 .expect("unwrap");
501 table
502 .add_predicate(PredicateInfo::new("p2", vec!["B".into()]))
503 .expect("unwrap");
504
505 let stats = SchemaStatistics::compute(&table);
506 let most_used = stats.most_used_domains(1);
507 assert_eq!(most_used[0].0, "A");
508 assert_eq!(most_used[0].1, 2);
509 }
510}