fraiseql-cli 2.3.2

CLI tools for FraiseQL v2 - Schema compilation and development utilities
Documentation
//! Schema Optimizer
//!
//! Analyzes compiled schemas and adds SQL generation hints for runtime optimization.
//! This runs during compilation to precompute optimization strategies.

use anyhow::Result;
use fraiseql_core::{
    db::types::DatabaseType,
    schema::{CompiledSchema, QueryDefinition, SqlProjectionHint, TypeDefinition},
};
use tracing::{debug, info};

/// Schema optimizer that analyzes queries and adds SQL hints
pub struct SchemaOptimizer;

impl SchemaOptimizer {
    /// Optimize a compiled schema
    ///
    /// This analyzes queries and adds optimization hints like:
    /// - Index suggestions
    /// - Join order hints
    /// - Projection optimization
    /// - Predicate pushdown opportunities
    ///
    /// # Errors
    ///
    /// Currently infallible; returns `Ok` in all cases. The `Result` return
    /// type is reserved for future optimizations that may produce errors.
    pub fn optimize(schema: &mut CompiledSchema) -> Result<OptimizationReport> {
        info!("Optimizing compiled schema");

        let mut report = OptimizationReport::default();

        // Analyze queries for optimization opportunities
        for query in &schema.queries {
            Self::analyze_query(query, &mut report);
        }

        // Analyze types for field access patterns and SQL projection opportunities
        Self::analyze_types(schema, &mut report);

        // Detect and apply SQL projection hints to types that would benefit
        Self::apply_sql_projection_hints(schema, &mut report);

        info!("Schema optimization complete: {} hints generated", report.total_hints());

        Ok(report)
    }

    /// Analyze a single query for optimization opportunities
    fn analyze_query(query: &QueryDefinition, report: &mut OptimizationReport) {
        debug!("Analyzing query: {}", query.name);

        // Check if query would benefit from indexes
        if query.returns_list && !query.arguments.is_empty() {
            report.index_hints.push(IndexHint {
                query_name:        query.name.clone(),
                reason:            "List query with arguments benefits from index".to_string(),
                suggested_columns: query.arguments.iter().map(|arg| arg.name.clone()).collect(),
            });
        }

        // Check for auto-params that suggest filtering
        if query.auto_params.has_where {
            report.optimization_notes.push(format!(
                "Query '{}' supports WHERE filtering - ensure denormalized filter columns exist",
                query.name
            ));
        }

        // Check for pagination
        if query.auto_params.has_limit || query.auto_params.has_offset {
            report.optimization_notes.push(format!(
                "Query '{}' supports pagination - consider adding ORDER BY for deterministic results",
                query.name
            ));
        }
    }

    /// Analyze types for field access patterns
    fn analyze_types(schema: &CompiledSchema, report: &mut OptimizationReport) {
        for type_def in &schema.types {
            // Check for large number of fields (potential over-fetching)
            if type_def.fields.len() > 20 {
                report.optimization_notes.push(format!(
                    "Type '{}' has {} fields - consider field selection optimization",
                    type_def.name,
                    type_def.fields.len()
                ));
            }

            // Check for JSONB columns
            if !type_def.jsonb_column.is_empty() {
                report.optimization_notes.push(format!(
                    "Type '{}' uses JSONB column '{}' - ensure GIN index exists for performance",
                    type_def.name, type_def.jsonb_column
                ));
            }
        }
    }

    /// Detect and apply SQL projection hints to types that would benefit from SQL-level field
    /// projection.
    ///
    /// SQL projection optimization works by filtering JSONB fields at the database level,
    /// reducing network payload and JSON deserialization overhead.
    ///
    /// Detection heuristics:
    /// - Type must have a JSONB column
    /// - Type should have sufficient fields (>10) or estimated large payload (>1KB)
    /// - PostgreSQL benefit: reduced payload and latency (proportional to fields omitted; run
    ///   `cargo bench --bench sql_projection_benchmark` for hardware-specific numbers)
    fn apply_sql_projection_hints(schema: &mut CompiledSchema, report: &mut OptimizationReport) {
        for type_def in &mut schema.types {
            if Self::should_use_projection(type_def) {
                let hint = Self::create_projection_hint(type_def);

                debug!(
                    "Type '{}' qualifies for SQL projection: {} bytes saved ({:.0}%)",
                    type_def.name,
                    Self::estimate_payload_savings(type_def),
                    hint.estimated_reduction_percent
                );

                type_def.sql_projection_hint = Some(hint);
                report.projection_hints.push(ProjectionHint {
                    type_name:                   type_def.name.to_string(),
                    field_count:                 type_def.fields.len(),
                    estimated_reduction_percent: type_def
                        .sql_projection_hint
                        .as_ref()
                        .map_or(0, |h| h.estimated_reduction_percent),
                });
            }
        }
    }

    /// Determine if a type should use SQL projection optimization.
    ///
    /// A type qualifies for SQL projection if:
    /// 1. It has a JSONB column (`store_format` == "jsonb")
    /// 2. It has sufficient fields (>10) OR estimated large payload (>1KB)
    ///
    /// Rationale: SQL projection's benefit (reducing JSONB payload) is most valuable
    /// for types with many fields or large payloads. Small types don't benefit enough
    /// to justify the SQL generation overhead.
    fn should_use_projection(type_def: &TypeDefinition) -> bool {
        // Condition 1: Must have JSONB column
        if type_def.jsonb_column.is_empty() {
            return false;
        }

        // Condition 2a: Sufficient field count (>10 fields = likely significant overhead)
        if type_def.fields.len() > 10 {
            return true;
        }

        // Condition 2b: Likely large payload (estimate ~150 bytes per field)
        // Average field: id (50B) + name (100B) + value (100B) = 250B overhead
        // 1KB threshold = ~4+ fields of average size
        let estimated_size = type_def.fields.len() * 250;
        if estimated_size > 1024 {
            return true;
        }

        false
    }

    /// Create a SQL projection hint for PostgreSQL.
    ///
    /// The hint contains:
    /// - Database type: "postgresql"
    /// - Projection template: `jsonb_build_object('field1', data->>'field1', ...)`
    /// - Estimated reduction: Based on field count and typical JSONB overhead
    fn create_projection_hint(type_def: &TypeDefinition) -> SqlProjectionHint {
        // Rough estimate: each unselected field ≈ 250 bytes saved (conservative).
        // Actual reduction varies by schema and query pattern; not a guaranteed figure.
        let estimated_reduction = Self::estimate_reduction_percent(type_def.fields.len());

        SqlProjectionHint::new(
            DatabaseType::PostgreSQL,
            Self::generate_postgresql_projection_template(type_def),
            estimated_reduction,
        )
    }

    /// Estimate the percentage of payload that can be reduced through SQL projection.
    ///
    /// Based on benchmarks:
    /// - Baseline payload: ~9.8 KB for typical large type
    /// - Projected payload: ~450 B (select 5 key fields)
    /// - Reduction: 95.4%
    ///
    /// Conservative scaling formula:
    /// - Few fields (5-10): 40% reduction (mostly JSONB overhead, few wasted fields)
    /// - Many fields (11-20): 70% reduction (more unselected fields)
    /// - Very many fields (20+): 85% reduction (mostly unnecessary data)
    const fn estimate_reduction_percent(field_count: usize) -> u32 {
        match field_count {
            0..=10 => 40,
            11..=20 => 70,
            _ => 85,
        }
    }

    /// Estimate total payload savings in bytes for a type.
    fn estimate_payload_savings(type_def: &TypeDefinition) -> usize {
        let estimated_reduction = Self::estimate_reduction_percent(type_def.fields.len());
        // Assume baseline JSONB payload ~250 bytes per field
        let total_payload = type_def.fields.len() * 250;
        (total_payload * estimated_reduction as usize) / 100
    }

    /// Generate a PostgreSQL `jsonb_build_object` template for SQL projection.
    ///
    /// Example output:
    /// `jsonb_build_object`('id', data->>'id', 'name', data->>'name', 'email', data->>'email')
    ///
    /// Note: This is a template. At runtime, the adapter will:
    /// 1. Receive the requested GraphQL fields
    /// 2. Filter to only include requested fields
    /// 3. Generate the actual SQL with selected fields only
    fn generate_postgresql_projection_template(type_def: &TypeDefinition) -> String {
        if type_def.fields.is_empty() {
            // Edge case: type with no fields, use pass-through
            "data".to_string()
        } else {
            // Create template with first N fields (up to 20 as representative)
            let field_list: Vec<String> = type_def
                .fields
                .iter()
                .take(20)
                .map(|f| format!("'{}', data->>'{}' ", f.name, f.name))
                .collect();

            format!("jsonb_build_object({})", field_list.join(","))
        }
    }
}

/// Optimization report generated during compilation
#[derive(Debug, Default)]
pub struct OptimizationReport {
    /// Index suggestions for query performance
    pub index_hints:        Vec<IndexHint>,
    /// SQL projection hints for types that would benefit from JSONB field filtering
    pub projection_hints:   Vec<ProjectionHint>,
    /// General optimization notes
    pub optimization_notes: Vec<String>,
}

impl OptimizationReport {
    /// Get total number of optimization hints
    pub fn total_hints(&self) -> usize {
        self.index_hints.len() + self.projection_hints.len() + self.optimization_notes.len()
    }

    /// Check if there are any optimization suggestions
    pub fn has_suggestions(&self) -> bool {
        !self.index_hints.is_empty()
            || !self.projection_hints.is_empty()
            || !self.optimization_notes.is_empty()
    }

    /// Print report to stdout
    pub fn print(&self) {
        if !self.has_suggestions() {
            return;
        }

        println!("\nOptimization Suggestions:");

        if !self.index_hints.is_empty() {
            println!("\n  Indexes:");
            for hint in &self.index_hints {
                println!("  • Query '{}': {}", hint.query_name, hint.reason);
                println!("    Columns: {}", hint.suggested_columns.join(", "));
            }
        }

        if !self.projection_hints.is_empty() {
            println!("\n  SQL Projection Optimization:");
            for hint in &self.projection_hints {
                println!(
                    "  • Type '{}' ({} fields): ~{}% payload reduction",
                    hint.type_name, hint.field_count, hint.estimated_reduction_percent
                );
            }
        }

        if !self.optimization_notes.is_empty() {
            println!("\n  Notes:");
            for note in &self.optimization_notes {
                println!("{note}");
            }
        }

        println!();
    }
}

/// Index hint for query optimization
#[derive(Debug, Clone)]
pub struct IndexHint {
    /// Query name that would benefit from index
    pub query_name:        String,
    /// Reason for the suggestion
    pub reason:            String,
    /// Suggested columns to index
    pub suggested_columns: Vec<String>,
}

/// SQL projection hint for type optimization
#[derive(Debug, Clone)]
pub struct ProjectionHint {
    /// Type name that would benefit from SQL projection
    pub type_name:                   String,
    /// Number of fields in the type
    pub field_count:                 usize,
    /// Estimated payload reduction percentage (0-100)
    pub estimated_reduction_percent: u32,
}