term-guard 0.0.2

A Rust data validation library providing Deequ-like capabilities without Spark dependencies
Documentation
//! Query analysis for constraint optimization.

use crate::core::Constraint;
use crate::prelude::TermError;
use std::collections::HashMap;
use std::sync::Arc;

/// Information about a constraint's query pattern.
#[derive(Debug, Clone)]
pub struct ConstraintAnalysis {
    /// The constraint name
    pub name: String,
    /// The constraint reference
    pub constraint: Arc<dyn Constraint>,
    /// The table being queried (usually "data")
    pub table_name: String,
    /// Type of aggregations used (COUNT, SUM, etc.)
    pub aggregations: Vec<AggregationType>,
    /// Columns referenced in the query
    pub columns: Vec<String>,
    /// Whether the query has WHERE clauses
    pub has_predicates: bool,
    /// Whether the query can be combined with others
    pub is_combinable: bool,
}

/// Types of aggregations used in constraints.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum AggregationType {
    Count,
    CountDistinct,
    Sum,
    Avg,
    Min,
    Max,
    StdDev,
    Variance,
}

/// Analyzes constraints to identify optimization opportunities.
#[derive(Debug)]
pub struct QueryAnalyzer {
    /// Cache of analysis results
    cache: HashMap<String, ConstraintAnalysis>,
}

impl QueryAnalyzer {
    /// Creates a new query analyzer.
    pub fn new() -> Self {
        Self {
            cache: HashMap::new(),
        }
    }

    /// Analyzes a set of constraints.
    pub fn analyze(
        &mut self,
        constraints: &[(String, Arc<dyn Constraint>)],
    ) -> Result<Vec<ConstraintAnalysis>, TermError> {
        let mut analyses = Vec::new();

        for (name, constraint) in constraints {
            // Check cache first
            if let Some(cached) = self.cache.get(name) {
                analyses.push(cached.clone());
                continue;
            }

            // Analyze the constraint
            let analysis = self.analyze_constraint(name.clone(), constraint.clone())?;

            // Cache the result
            self.cache.insert(name.clone(), analysis.clone());
            analyses.push(analysis);
        }

        Ok(analyses)
    }

    /// Analyzes a single constraint.
    pub fn analyze_constraint(
        &self,
        name: String,
        constraint: Arc<dyn Constraint>,
    ) -> Result<ConstraintAnalysis, TermError> {
        // For now, we'll use heuristics based on constraint names
        // In a real implementation, we'd parse the SQL or use constraint metadata

        let constraint_name = constraint.name();

        // Determine aggregations based on constraint type
        let aggregations = match constraint_name {
            "completeness" => vec![AggregationType::Count],
            "uniqueness" => vec![AggregationType::Count, AggregationType::CountDistinct],
            "compliance" => vec![AggregationType::Count],
            "min" => vec![AggregationType::Min],
            "max" => vec![AggregationType::Max],
            "mean" => vec![AggregationType::Avg],
            "sum" => vec![AggregationType::Sum],
            "standard_deviation" => vec![AggregationType::StdDev],
            "quantile" => vec![AggregationType::Count], // Simplified
            "entropy" => vec![AggregationType::Count],  // Simplified
            "mutual_information" => vec![AggregationType::Count], // Simplified
            "histogram" => vec![AggregationType::Count],
            _ => vec![AggregationType::Count], // Default
        };

        // Extract column information (simplified for now)
        let columns = self.extract_columns(constraint_name);

        // Determine if query has predicates
        let has_predicates = matches!(
            constraint_name,
            "compliance" | "pattern_match" | "containment"
        );

        // Most constraints can be combined except for complex ones
        let is_combinable = !matches!(
            constraint_name,
            "quantile" | "entropy" | "mutual_information" | "anomaly_detection"
        );

        Ok(ConstraintAnalysis {
            name,
            constraint,
            table_name: "data".to_string(),
            aggregations,
            columns,
            has_predicates,
            is_combinable,
        })
    }

    /// Extracts column names from constraint (simplified heuristic).
    fn extract_columns(&self, constraint_name: &str) -> Vec<String> {
        // In a real implementation, we'd parse the constraint's configuration
        // For now, return a placeholder
        match constraint_name {
            "completeness" | "uniqueness" | "min" | "max" | "mean" | "sum" => {
                vec!["column".to_string()] // Placeholder
            }
            "mutual_information" => {
                vec!["column1".to_string(), "column2".to_string()]
            }
            _ => vec![],
        }
    }

    /// Clears the analysis cache.
    pub fn clear_cache(&mut self) {
        self.cache.clear();
    }
}

impl Default for QueryAnalyzer {
    fn default() -> Self {
        Self::new()
    }
}

// TODO: Fix tests once Completeness constraint is made public
#[cfg(test)]
mod tests {
    use super::*;
    // use crate::constraints::completeness::Completeness;

    #[test]
    fn test_analyzer_creation() {
        let analyzer = QueryAnalyzer::new();
        assert!(analyzer.cache.is_empty());
    }

    // TODO: Re-enable once Completeness is made public
    // #[tokio::test]
    // async fn test_constraint_analysis() {
    //     let mut analyzer = QueryAnalyzer::new();
    //
    //     let constraint = Arc::new(Completeness::new("test_column")) as Arc<dyn Constraint>;
    //     let constraints = vec![("test".to_string(), constraint)];
    //
    //     let analyses = analyzer.analyze(&constraints).unwrap();
    //     assert_eq!(analyses.len(), 1);
    //
    //     let analysis = &analyses[0];
    //     assert_eq!(analysis.name, "test");
    //     assert_eq!(analysis.table_name, "data");
    //     assert!(analysis.is_combinable);
    //     assert!(!analysis.has_predicates);
    // }
}