use crate::core::Constraint;
use crate::prelude::TermError;
use std::collections::HashMap;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct ConstraintAnalysis {
pub name: String,
pub constraint: Arc<dyn Constraint>,
pub table_name: String,
pub aggregations: Vec<AggregationType>,
pub columns: Vec<String>,
pub has_predicates: bool,
pub is_combinable: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum AggregationType {
Count,
CountDistinct,
Sum,
Avg,
Min,
Max,
StdDev,
Variance,
}
#[derive(Debug)]
pub struct QueryAnalyzer {
cache: HashMap<String, ConstraintAnalysis>,
}
impl QueryAnalyzer {
pub fn new() -> Self {
Self {
cache: HashMap::new(),
}
}
pub fn analyze(
&mut self,
constraints: &[(String, Arc<dyn Constraint>)],
) -> Result<Vec<ConstraintAnalysis>, TermError> {
let mut analyses = Vec::new();
for (name, constraint) in constraints {
if let Some(cached) = self.cache.get(name) {
analyses.push(cached.clone());
continue;
}
let analysis = self.analyze_constraint(name.clone(), constraint.clone())?;
self.cache.insert(name.clone(), analysis.clone());
analyses.push(analysis);
}
Ok(analyses)
}
pub fn analyze_constraint(
&self,
name: String,
constraint: Arc<dyn Constraint>,
) -> Result<ConstraintAnalysis, TermError> {
let constraint_name = constraint.name();
let aggregations = match constraint_name {
"completeness" => vec![AggregationType::Count],
"uniqueness" => vec![AggregationType::Count, AggregationType::CountDistinct],
"compliance" => vec![AggregationType::Count],
"min" => vec![AggregationType::Min],
"max" => vec![AggregationType::Max],
"mean" => vec![AggregationType::Avg],
"sum" => vec![AggregationType::Sum],
"standard_deviation" => vec![AggregationType::StdDev],
"quantile" => vec![AggregationType::Count], "entropy" => vec![AggregationType::Count], "mutual_information" => vec![AggregationType::Count], "histogram" => vec![AggregationType::Count],
_ => vec![AggregationType::Count], };
let columns = self.extract_columns(constraint_name);
let has_predicates = matches!(
constraint_name,
"compliance" | "pattern_match" | "containment"
);
let is_combinable = !matches!(
constraint_name,
"quantile" | "entropy" | "mutual_information" | "anomaly_detection"
);
Ok(ConstraintAnalysis {
name,
constraint,
table_name: "data".to_string(),
aggregations,
columns,
has_predicates,
is_combinable,
})
}
fn extract_columns(&self, constraint_name: &str) -> Vec<String> {
match constraint_name {
"completeness" | "uniqueness" | "min" | "max" | "mean" | "sum" => {
vec!["column".to_string()] }
"mutual_information" => {
vec!["column1".to_string(), "column2".to_string()]
}
_ => vec![],
}
}
pub fn clear_cache(&mut self) {
self.cache.clear();
}
}
impl Default for QueryAnalyzer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_analyzer_creation() {
let analyzer = QueryAnalyzer::new();
assert!(analyzer.cache.is_empty());
}
}