use std::collections::{HashMap, HashSet};
use crate::SymbolTable;
#[derive(Clone, Debug)]
pub struct SchemaStatistics {
pub domain_count: usize,
pub predicate_count: usize,
pub variable_count: usize,
pub total_cardinality: usize,
pub avg_cardinality: f64,
pub max_cardinality: usize,
pub min_cardinality: usize,
pub arity_distribution: HashMap<usize, usize>,
pub domain_usage_frequency: HashMap<String, usize>,
pub unused_domains: Vec<String>,
pub predicates_by_arity: HashMap<usize, Vec<String>>,
}
impl SchemaStatistics {
pub fn compute(table: &SymbolTable) -> Self {
let domain_count = table.domains.len();
let predicate_count = table.predicates.len();
let variable_count = table.variables.len();
let cardinalities: Vec<usize> = table.domains.values().map(|d| d.cardinality).collect();
let total_cardinality: usize = cardinalities.iter().sum();
let avg_cardinality = if domain_count > 0 {
total_cardinality as f64 / domain_count as f64
} else {
0.0
};
let max_cardinality = cardinalities.iter().copied().max().unwrap_or(0);
let min_cardinality = cardinalities.iter().copied().min().unwrap_or(0);
let mut arity_distribution = HashMap::new();
let mut predicates_by_arity: HashMap<usize, Vec<String>> = HashMap::new();
for (name, pred) in &table.predicates {
let arity = pred.arg_domains.len();
*arity_distribution.entry(arity).or_insert(0) += 1;
predicates_by_arity
.entry(arity)
.or_default()
.push(name.clone());
}
let mut domain_usage_frequency = HashMap::new();
for pred in table.predicates.values() {
for domain in &pred.arg_domains {
*domain_usage_frequency.entry(domain.clone()).or_insert(0) += 1;
}
}
let used_domains: HashSet<_> = domain_usage_frequency.keys().cloned().collect();
let unused_domains: Vec<String> = table
.domains
.keys()
.filter(|d| !used_domains.contains(*d))
.cloned()
.collect();
Self {
domain_count,
predicate_count,
variable_count,
total_cardinality,
avg_cardinality,
max_cardinality,
min_cardinality,
arity_distribution,
domain_usage_frequency,
unused_domains,
predicates_by_arity,
}
}
pub fn most_used_domains(&self, n: usize) -> Vec<(String, usize)> {
let mut usage: Vec<_> = self.domain_usage_frequency.iter().collect();
usage.sort_by(|a, b| b.1.cmp(a.1));
usage
.into_iter()
.take(n)
.map(|(d, &count)| (d.clone(), count))
.collect()
}
pub fn least_used_domains(&self, n: usize) -> Vec<(String, usize)> {
let mut usage: Vec<_> = self.domain_usage_frequency.iter().collect();
usage.sort_by(|a, b| a.1.cmp(b.1));
usage
.into_iter()
.take(n)
.map(|(d, &count)| (d.clone(), count))
.collect()
}
pub fn complexity_score(&self) -> f64 {
let domain_factor = self.domain_count as f64;
let predicate_factor = self.predicate_count as f64;
let arity_diversity = self.arity_distribution.len() as f64;
let usage_variance = self.compute_usage_variance();
domain_factor * 0.2 + predicate_factor * 0.3 + arity_diversity * 0.2 + usage_variance * 0.3
}
fn compute_usage_variance(&self) -> f64 {
if self.domain_usage_frequency.is_empty() {
return 0.0;
}
let counts: Vec<f64> = self
.domain_usage_frequency
.values()
.map(|&c| c as f64)
.collect();
let mean = counts.iter().sum::<f64>() / counts.len() as f64;
let variance = counts.iter().map(|c| (c - mean).powi(2)).sum::<f64>() / counts.len() as f64;
variance.sqrt()
}
}
#[derive(Clone, Debug)]
pub struct SchemaRecommendations {
pub issues: Vec<SchemaIssue>,
pub suggestions: Vec<String>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum SchemaIssue {
UnusedDomain(String),
ZeroCardinalityDomain(String),
HighCardinalityDomain(String, usize),
HighArityPredicate(String, usize),
NoPredicates,
NoDomains,
}
impl SchemaIssue {
pub fn description(&self) -> String {
match self {
Self::UnusedDomain(name) => format!("Domain '{}' is defined but never used", name),
Self::ZeroCardinalityDomain(name) => {
format!("Domain '{}' has zero cardinality", name)
}
Self::HighCardinalityDomain(name, card) => {
format!(
"Domain '{}' has very high cardinality ({}), which may impact performance",
name, card
)
}
Self::HighArityPredicate(name, arity) => {
format!(
"Predicate '{}' has high arity ({}), consider decomposition",
name, arity
)
}
Self::NoPredicates => "Schema has no predicates defined".to_string(),
Self::NoDomains => "Schema has no domains defined".to_string(),
}
}
pub fn severity(&self) -> u8 {
match self {
Self::UnusedDomain(_) => 1,
Self::ZeroCardinalityDomain(_) => 2,
Self::HighCardinalityDomain(_, _) => 1,
Self::HighArityPredicate(_, _) => 1,
Self::NoPredicates => 2,
Self::NoDomains => 3,
}
}
}
pub struct SchemaAnalyzer;
impl SchemaAnalyzer {
pub fn analyze(table: &SymbolTable) -> SchemaRecommendations {
let mut issues = Vec::new();
let mut suggestions = Vec::new();
if table.domains.is_empty() {
issues.push(SchemaIssue::NoDomains);
suggestions.push("Define at least one domain for your schema".to_string());
return SchemaRecommendations {
issues,
suggestions,
};
}
if table.predicates.is_empty() {
issues.push(SchemaIssue::NoPredicates);
suggestions.push("Define predicates to enable reasoning over your domains".to_string());
}
let stats = SchemaStatistics::compute(table);
const HIGH_CARDINALITY_THRESHOLD: usize = 100_000;
for (name, domain) in &table.domains {
if domain.cardinality == 0 {
issues.push(SchemaIssue::ZeroCardinalityDomain(name.clone()));
}
if domain.cardinality > HIGH_CARDINALITY_THRESHOLD {
issues.push(SchemaIssue::HighCardinalityDomain(
name.clone(),
domain.cardinality,
));
}
if stats.unused_domains.contains(name) {
issues.push(SchemaIssue::UnusedDomain(name.clone()));
suggestions.push(format!(
"Consider removing unused domain '{}' or defining predicates that use it",
name
));
}
}
const HIGH_ARITY_THRESHOLD: usize = 5;
for (name, pred) in &table.predicates {
if pred.arg_domains.len() > HIGH_ARITY_THRESHOLD {
issues.push(SchemaIssue::HighArityPredicate(
name.clone(),
pred.arg_domains.len(),
));
suggestions.push(format!(
"Consider decomposing high-arity predicate '{}' into smaller predicates",
name
));
}
}
if stats.domain_count > 0 && stats.predicate_count == 0 {
suggestions
.push("Add predicates to establish relationships between your domains".to_string());
}
if stats.variable_count == 0 && stats.predicate_count > 0 {
suggestions
.push("Consider binding variables to enable quantification in rules".to_string());
}
SchemaRecommendations {
issues,
suggestions,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{DomainInfo, PredicateInfo};
#[test]
fn test_statistics_empty_table() {
let table = SymbolTable::new();
let stats = SchemaStatistics::compute(&table);
assert_eq!(stats.domain_count, 0);
assert_eq!(stats.predicate_count, 0);
assert_eq!(stats.variable_count, 0);
}
#[test]
fn test_statistics_with_data() {
let mut table = SymbolTable::new();
table
.add_domain(DomainInfo::new("Person", 100))
.expect("unwrap");
table
.add_domain(DomainInfo::new("Location", 50))
.expect("unwrap");
table
.add_predicate(PredicateInfo::new(
"knows",
vec!["Person".into(), "Person".into()],
))
.expect("unwrap");
table
.add_predicate(PredicateInfo::new(
"at",
vec!["Person".into(), "Location".into()],
))
.expect("unwrap");
let stats = SchemaStatistics::compute(&table);
assert_eq!(stats.domain_count, 2);
assert_eq!(stats.predicate_count, 2);
assert_eq!(stats.total_cardinality, 150);
assert_eq!(stats.avg_cardinality, 75.0);
assert_eq!(stats.max_cardinality, 100);
assert_eq!(stats.min_cardinality, 50);
assert_eq!(stats.domain_usage_frequency.get("Person"), Some(&3));
assert_eq!(stats.domain_usage_frequency.get("Location"), Some(&1));
assert!(stats.unused_domains.is_empty());
}
#[test]
fn test_unused_domains() {
let mut table = SymbolTable::new();
table
.add_domain(DomainInfo::new("Person", 100))
.expect("unwrap");
table
.add_domain(DomainInfo::new("Unused", 50))
.expect("unwrap");
table
.add_predicate(PredicateInfo::new("age", vec!["Person".into()]))
.expect("unwrap");
let stats = SchemaStatistics::compute(&table);
assert_eq!(stats.unused_domains, vec!["Unused"]);
}
#[test]
fn test_arity_distribution() {
let mut table = SymbolTable::new();
table.add_domain(DomainInfo::new("D", 10)).expect("unwrap");
table
.add_predicate(PredicateInfo::new("p1", vec!["D".into()]))
.expect("unwrap");
table
.add_predicate(PredicateInfo::new("p2", vec!["D".into(), "D".into()]))
.expect("unwrap");
table
.add_predicate(PredicateInfo::new("p3", vec!["D".into(), "D".into()]))
.expect("unwrap");
let stats = SchemaStatistics::compute(&table);
assert_eq!(stats.arity_distribution.get(&1), Some(&1));
assert_eq!(stats.arity_distribution.get(&2), Some(&2));
}
#[test]
fn test_analyzer_no_domains() {
let table = SymbolTable::new();
let recs = SchemaAnalyzer::analyze(&table);
assert!(!recs.issues.is_empty());
assert!(recs.issues.contains(&SchemaIssue::NoDomains));
}
#[test]
fn test_analyzer_zero_cardinality() {
let mut table = SymbolTable::new();
table
.add_domain(DomainInfo::new("Person", 0))
.expect("unwrap");
let recs = SchemaAnalyzer::analyze(&table);
assert!(recs
.issues
.contains(&SchemaIssue::ZeroCardinalityDomain("Person".to_string())));
}
#[test]
fn test_analyzer_unused_domain() {
let mut table = SymbolTable::new();
table
.add_domain(DomainInfo::new("Used", 10))
.expect("unwrap");
table
.add_domain(DomainInfo::new("Unused", 10))
.expect("unwrap");
table
.add_predicate(PredicateInfo::new("p", vec!["Used".into()]))
.expect("unwrap");
let recs = SchemaAnalyzer::analyze(&table);
assert!(recs
.issues
.contains(&SchemaIssue::UnusedDomain("Unused".to_string())));
}
#[test]
fn test_analyzer_high_arity() {
let mut table = SymbolTable::new();
table.add_domain(DomainInfo::new("D", 10)).expect("unwrap");
let args = vec!["D".to_string(); 10]; table
.add_predicate(PredicateInfo::new("complex", args))
.expect("unwrap");
let recs = SchemaAnalyzer::analyze(&table);
assert!(recs
.issues
.iter()
.any(|i| matches!(i, SchemaIssue::HighArityPredicate(_, _))));
}
#[test]
fn test_complexity_score() {
let mut table = SymbolTable::new();
table
.add_domain(DomainInfo::new("Person", 100))
.expect("unwrap");
table
.add_predicate(PredicateInfo::new("p", vec!["Person".into()]))
.expect("unwrap");
let stats = SchemaStatistics::compute(&table);
let score = stats.complexity_score();
assert!(score > 0.0);
}
#[test]
fn test_most_used_domains() {
let mut table = SymbolTable::new();
table.add_domain(DomainInfo::new("A", 10)).expect("unwrap");
table.add_domain(DomainInfo::new("B", 10)).expect("unwrap");
table
.add_predicate(PredicateInfo::new("p1", vec!["A".into(), "A".into()]))
.expect("unwrap");
table
.add_predicate(PredicateInfo::new("p2", vec!["B".into()]))
.expect("unwrap");
let stats = SchemaStatistics::compute(&table);
let most_used = stats.most_used_domains(1);
assert_eq!(most_used[0].0, "A");
assert_eq!(most_used[0].1, 2);
}
}