Skip to main content

fraiseql_cli/schema/
optimizer.rs

1//! Schema Optimizer
2//!
3//! Analyzes compiled schemas and adds SQL generation hints for runtime optimization.
4//! This runs during compilation to precompute optimization strategies.
5
6use anyhow::Result;
7use fraiseql_core::schema::{CompiledSchema, QueryDefinition, SqlProjectionHint, TypeDefinition};
8use tracing::{debug, info};
9
10/// Schema optimizer that analyzes queries and adds SQL hints
11pub struct SchemaOptimizer;
12
13impl SchemaOptimizer {
14    /// Optimize a compiled schema
15    ///
16    /// This analyzes queries and adds optimization hints like:
17    /// - Index suggestions
18    /// - Join order hints
19    /// - Projection optimization
20    /// - Predicate pushdown opportunities
21    pub fn optimize(schema: &mut CompiledSchema) -> Result<OptimizationReport> {
22        info!("Optimizing compiled schema");
23
24        let mut report = OptimizationReport::default();
25
26        // Analyze queries for optimization opportunities
27        for query in &schema.queries {
28            Self::analyze_query(query, &mut report);
29        }
30
31        // Analyze types for field access patterns and SQL projection opportunities
32        Self::analyze_types(schema, &mut report);
33
34        // Detect and apply SQL projection hints to types that would benefit
35        Self::apply_sql_projection_hints(schema, &mut report);
36
37        info!("Schema optimization complete: {} hints generated", report.total_hints());
38
39        Ok(report)
40    }
41
42    /// Analyze a single query for optimization opportunities
43    fn analyze_query(query: &QueryDefinition, report: &mut OptimizationReport) {
44        debug!("Analyzing query: {}", query.name);
45
46        // Check if query would benefit from indexes
47        if query.returns_list && !query.arguments.is_empty() {
48            report.index_hints.push(IndexHint {
49                query_name:        query.name.clone(),
50                reason:            "List query with arguments benefits from index".to_string(),
51                suggested_columns: query.arguments.iter().map(|arg| arg.name.clone()).collect(),
52            });
53        }
54
55        // Check for auto-params that suggest filtering
56        if query.auto_params.has_where {
57            report.optimization_notes.push(format!(
58                "Query '{}' supports WHERE filtering - ensure denormalized filter columns exist",
59                query.name
60            ));
61        }
62
63        // Check for pagination
64        if query.auto_params.has_limit || query.auto_params.has_offset {
65            report.optimization_notes.push(format!(
66                "Query '{}' supports pagination - consider adding ORDER BY for deterministic results",
67                query.name
68            ));
69        }
70    }
71
72    /// Analyze types for field access patterns
73    fn analyze_types(schema: &CompiledSchema, report: &mut OptimizationReport) {
74        for type_def in &schema.types {
75            // Check for large number of fields (potential over-fetching)
76            if type_def.fields.len() > 20 {
77                report.optimization_notes.push(format!(
78                    "Type '{}' has {} fields - consider field selection optimization",
79                    type_def.name,
80                    type_def.fields.len()
81                ));
82            }
83
84            // Check for JSONB columns
85            if !type_def.jsonb_column.is_empty() {
86                report.optimization_notes.push(format!(
87                    "Type '{}' uses JSONB column '{}' - ensure GIN index exists for performance",
88                    type_def.name, type_def.jsonb_column
89                ));
90            }
91        }
92    }
93
94    /// Detect and apply SQL projection hints to types that would benefit from SQL-level field
95    /// projection.
96    ///
97    /// SQL projection optimization works by filtering JSONB fields at the database level,
98    /// reducing network payload and JSON deserialization overhead.
99    ///
100    /// Detection heuristics:
101    /// - Type must have a JSONB column
102    /// - Type should have sufficient fields (>10) or estimated large payload (>1KB)
103    /// - PostgreSQL benefit: 95% payload reduction, 37% latency improvement
104    fn apply_sql_projection_hints(schema: &mut CompiledSchema, report: &mut OptimizationReport) {
105        for type_def in &mut schema.types {
106            if Self::should_use_projection(type_def) {
107                let hint = Self::create_projection_hint(type_def);
108
109                debug!(
110                    "Type '{}' qualifies for SQL projection: {} bytes saved ({:.0}%)",
111                    type_def.name,
112                    Self::estimate_payload_savings(type_def),
113                    hint.estimated_reduction_percent
114                );
115
116                type_def.sql_projection_hint = Some(hint);
117                report.projection_hints.push(ProjectionHint {
118                    type_name:                   type_def.name.clone(),
119                    field_count:                 type_def.fields.len(),
120                    estimated_reduction_percent: type_def
121                        .sql_projection_hint
122                        .as_ref()
123                        .map_or(0, |h| h.estimated_reduction_percent),
124                });
125            }
126        }
127    }
128
129    /// Determine if a type should use SQL projection optimization.
130    ///
131    /// A type qualifies for SQL projection if:
132    /// 1. It has a JSONB column (`store_format` == "jsonb")
133    /// 2. It has sufficient fields (>10) OR estimated large payload (>1KB)
134    ///
135    /// Rationale: SQL projection's benefit (reducing JSONB payload) is most valuable
136    /// for types with many fields or large payloads. Small types don't benefit enough
137    /// to justify the SQL generation overhead.
138    fn should_use_projection(type_def: &TypeDefinition) -> bool {
139        // Condition 1: Must have JSONB column
140        if type_def.jsonb_column.is_empty() {
141            return false;
142        }
143
144        // Condition 2a: Sufficient field count (>10 fields = likely significant overhead)
145        if type_def.fields.len() > 10 {
146            return true;
147        }
148
149        // Condition 2b: Likely large payload (estimate ~150 bytes per field)
150        // Average field: id (50B) + name (100B) + value (100B) = 250B overhead
151        // 1KB threshold = ~4+ fields of average size
152        let estimated_size = type_def.fields.len() * 250;
153        if estimated_size > 1024 {
154            return true;
155        }
156
157        false
158    }
159
160    /// Create a SQL projection hint for PostgreSQL.
161    ///
162    /// The hint contains:
163    /// - Database type: "postgresql"
164    /// - Projection template: `jsonb_build_object('field1', data->>'field1', ...)`
165    /// - Estimated reduction: Based on field count and typical JSONB overhead
166    fn create_projection_hint(type_def: &TypeDefinition) -> SqlProjectionHint {
167        // Estimate payload reduction based on field count and JSONB overhead
168        // Formula: Each unselected field = ~250 bytes saved (conservative estimate)
169        // Average type: 20 fields, 5 selected = 15 fields Ɨ 250B = 3750B saved = 95% reduction
170        let estimated_reduction = Self::estimate_reduction_percent(type_def.fields.len());
171
172        SqlProjectionHint {
173            database:                    "postgresql".to_string(),
174            projection_template:         Self::generate_postgresql_projection_template(type_def),
175            estimated_reduction_percent: estimated_reduction,
176        }
177    }
178
179    /// Estimate the percentage of payload that can be reduced through SQL projection.
180    ///
181    /// Based on benchmarks:
182    /// - Baseline payload: ~9.8 KB for typical large type
183    /// - Projected payload: ~450 B (select 5 key fields)
184    /// - Reduction: 95.4%
185    ///
186    /// Conservative scaling formula:
187    /// - Few fields (5-10): 40% reduction (mostly JSONB overhead, few wasted fields)
188    /// - Many fields (11-20): 70% reduction (more unselected fields)
189    /// - Very many fields (20+): 85% reduction (mostly unnecessary data)
190    const fn estimate_reduction_percent(field_count: usize) -> u32 {
191        match field_count {
192            0..=10 => 40,
193            11..=20 => 70,
194            _ => 85,
195        }
196    }
197
198    /// Estimate total payload savings in bytes for a type.
199    fn estimate_payload_savings(type_def: &TypeDefinition) -> usize {
200        let estimated_reduction = Self::estimate_reduction_percent(type_def.fields.len());
201        // Assume baseline JSONB payload ~250 bytes per field
202        let total_payload = type_def.fields.len() * 250;
203        (total_payload * estimated_reduction as usize) / 100
204    }
205
206    /// Generate a PostgreSQL `jsonb_build_object` template for SQL projection.
207    ///
208    /// Example output:
209    /// `jsonb_build_object`('id', data->>'id', 'name', data->>'name', 'email', data->>'email')
210    ///
211    /// Note: This is a template. At runtime, the adapter will:
212    /// 1. Receive the requested GraphQL fields
213    /// 2. Filter to only include requested fields
214    /// 3. Generate the actual SQL with selected fields only
215    fn generate_postgresql_projection_template(type_def: &TypeDefinition) -> String {
216        if type_def.fields.is_empty() {
217            // Edge case: type with no fields, use pass-through
218            "data".to_string()
219        } else {
220            // Create template with first N fields (up to 20 as representative)
221            let field_list: Vec<String> = type_def
222                .fields
223                .iter()
224                .take(20)
225                .map(|f| format!("'{}', data->>'{}' ", f.name, f.name))
226                .collect();
227
228            format!("jsonb_build_object({})", field_list.join(","))
229        }
230    }
231}
232
233/// Optimization report generated during compilation
234#[derive(Debug, Default)]
235pub struct OptimizationReport {
236    /// Index suggestions for query performance
237    pub index_hints:        Vec<IndexHint>,
238    /// SQL projection hints for types that would benefit from JSONB field filtering
239    pub projection_hints:   Vec<ProjectionHint>,
240    /// General optimization notes
241    pub optimization_notes: Vec<String>,
242}
243
244impl OptimizationReport {
245    /// Get total number of optimization hints
246    pub fn total_hints(&self) -> usize {
247        self.index_hints.len() + self.projection_hints.len() + self.optimization_notes.len()
248    }
249
250    /// Check if there are any optimization suggestions
251    pub fn has_suggestions(&self) -> bool {
252        !self.index_hints.is_empty()
253            || !self.projection_hints.is_empty()
254            || !self.optimization_notes.is_empty()
255    }
256
257    /// Print report to stdout
258    pub fn print(&self) {
259        if !self.has_suggestions() {
260            return;
261        }
262
263        println!("\nšŸ“Š Optimization Suggestions:");
264
265        if !self.index_hints.is_empty() {
266            println!("\n  Indexes:");
267            for hint in &self.index_hints {
268                println!("  • Query '{}': {}", hint.query_name, hint.reason);
269                println!("    Columns: {}", hint.suggested_columns.join(", "));
270            }
271        }
272
273        if !self.projection_hints.is_empty() {
274            println!("\n  SQL Projection Optimization:");
275            for hint in &self.projection_hints {
276                println!(
277                    "  • Type '{}' ({} fields): ~{}% payload reduction",
278                    hint.type_name, hint.field_count, hint.estimated_reduction_percent
279                );
280            }
281        }
282
283        if !self.optimization_notes.is_empty() {
284            println!("\n  Notes:");
285            for note in &self.optimization_notes {
286                println!("  • {note}");
287            }
288        }
289
290        println!();
291    }
292}
293
294/// Index hint for query optimization
295#[derive(Debug, Clone)]
296pub struct IndexHint {
297    /// Query name that would benefit from index
298    pub query_name:        String,
299    /// Reason for the suggestion
300    pub reason:            String,
301    /// Suggested columns to index
302    pub suggested_columns: Vec<String>,
303}
304
305/// SQL projection hint for type optimization
306#[derive(Debug, Clone)]
307pub struct ProjectionHint {
308    /// Type name that would benefit from SQL projection
309    pub type_name:                   String,
310    /// Number of fields in the type
311    pub field_count:                 usize,
312    /// Estimated payload reduction percentage (0-100)
313    pub estimated_reduction_percent: u32,
314}
315
316#[cfg(test)]
317mod tests {
318    use std::collections::HashMap;
319
320    use fraiseql_core::schema::{
321        ArgumentDefinition, AutoParams, FieldDefinition, FieldType, TypeDefinition,
322    };
323    use fraiseql_core::validation::CustomTypeRegistry;
324
325    use super::*;
326
327    #[test]
328    fn test_optimize_empty_schema() {
329        let mut schema = CompiledSchema {
330            types:         vec![],
331            enums:         vec![],
332            input_types:   vec![],
333            interfaces:    vec![],
334            unions:        vec![],
335            queries:       vec![],
336            mutations:     vec![],
337            subscriptions: vec![],
338            directives:    vec![],
339            observers:     Vec::new(),
340            fact_tables:   HashMap::default(),
341            federation:    None,
342            security:      None,
343            schema_sdl:    None,
344            custom_scalars: CustomTypeRegistry::default(),
345        };
346
347        let report = SchemaOptimizer::optimize(&mut schema).unwrap();
348        assert_eq!(report.total_hints(), 0);
349    }
350
351    #[test]
352    fn test_index_hint_for_list_query() {
353        let mut schema = CompiledSchema {
354            types:         vec![],
355            enums:         vec![],
356            input_types:   vec![],
357            interfaces:    vec![],
358            unions:        vec![],
359            queries:       vec![QueryDefinition {
360                name:         "users".to_string(),
361                return_type:  "User".to_string(),
362                returns_list: true,
363                nullable:     false,
364                arguments:    vec![ArgumentDefinition {
365                    name:          "status".to_string(),
366                    arg_type:      FieldType::String,
367                    nullable:      false,
368                    default_value: None,
369                    description:   None,
370                    deprecation:   None,
371                }],
372                sql_source:   Some("users".to_string()),
373                description:  None,
374                auto_params:  AutoParams::default(),
375                deprecation:  None,
376                jsonb_column: "data".to_string(),
377            }],
378            mutations:     vec![],
379            subscriptions: vec![],
380            directives:    vec![],
381            observers:     Vec::new(),
382            fact_tables:   HashMap::default(),
383            federation:    None,
384            security:      None,
385            schema_sdl:    None,
386            custom_scalars: CustomTypeRegistry::default(),
387        };
388
389        let report = SchemaOptimizer::optimize(&mut schema).unwrap();
390        assert!(report.total_hints() > 0);
391        assert!(!report.index_hints.is_empty());
392        assert_eq!(report.index_hints[0].query_name, "users");
393    }
394
395    #[test]
396    fn test_pagination_note() {
397        let mut schema = CompiledSchema {
398            types:         vec![],
399            enums:         vec![],
400            input_types:   vec![],
401            interfaces:    vec![],
402            unions:        vec![],
403            queries:       vec![QueryDefinition {
404                name:         "products".to_string(),
405                return_type:  "Product".to_string(),
406                returns_list: true,
407                nullable:     false,
408                arguments:    vec![],
409                sql_source:   Some("products".to_string()),
410                description:  None,
411                auto_params:  AutoParams {
412                    has_where:    false,
413                    has_order_by: false,
414                    has_limit:    true,
415                    has_offset:   true,
416                },
417                deprecation:  None,
418                jsonb_column: "data".to_string(),
419            }],
420            mutations:     vec![],
421            subscriptions: vec![],
422            directives:    vec![],
423            observers:     Vec::new(),
424            fact_tables:   HashMap::default(),
425            federation:    None,
426            security:      None,
427            schema_sdl:    None,
428            custom_scalars: CustomTypeRegistry::default(),
429        };
430
431        let report = SchemaOptimizer::optimize(&mut schema).unwrap();
432        assert!(report.optimization_notes.iter().any(|note| note.contains("pagination")));
433    }
434
435    #[test]
436    fn test_large_type_warning() {
437        let mut schema = CompiledSchema {
438            types:         vec![TypeDefinition {
439                name:                "BigType".to_string(),
440                sql_source:          String::new(),
441                jsonb_column:        String::new(),
442                fields:              (0..25)
443                    .map(|i| FieldDefinition {
444                        name:           format!("field{i}"),
445                        field_type:     FieldType::String,
446                        nullable:       false,
447                        default_value:  None,
448                        description:    None,
449                        vector_config:  None,
450                        alias:          None,
451                        deprecation:    None,
452                        requires_scope: None,
453                    })
454                    .collect(),
455                description:         None,
456                sql_projection_hint: None,
457                implements:          vec![],
458            }],
459            enums:         vec![],
460            input_types:   vec![],
461            interfaces:    vec![],
462            unions:        vec![],
463            queries:       vec![],
464            mutations:     vec![],
465            subscriptions: vec![],
466            directives:    vec![],
467            observers:     Vec::new(),
468            fact_tables:   HashMap::default(),
469            federation:    None,
470            security:      None,
471            schema_sdl:    None,
472            custom_scalars: CustomTypeRegistry::default(),
473        };
474
475        let report = SchemaOptimizer::optimize(&mut schema).unwrap();
476        assert!(report.optimization_notes.iter().any(|note| note.contains("25 fields")));
477    }
478
479    #[test]
480    fn test_projection_hint_for_large_type() {
481        let mut schema = CompiledSchema {
482            types:         vec![TypeDefinition {
483                name:                "User".to_string(),
484                sql_source:          "users".to_string(),
485                jsonb_column:        "data".to_string(),
486                fields:              (0..15)
487                    .map(|i| FieldDefinition {
488                        name:           format!("field{i}"),
489                        field_type:     FieldType::String,
490                        nullable:       false,
491                        default_value:  None,
492                        description:    None,
493                        vector_config:  None,
494                        alias:          None,
495                        deprecation:    None,
496                        requires_scope: None,
497                    })
498                    .collect(),
499                description:         None,
500                sql_projection_hint: None,
501                implements:          vec![],
502            }],
503            enums:         vec![],
504            input_types:   vec![],
505            interfaces:    vec![],
506            unions:        vec![],
507            queries:       vec![],
508            mutations:     vec![],
509            subscriptions: vec![],
510            directives:    vec![],
511            observers:     Vec::new(),
512            fact_tables:   HashMap::default(),
513            federation:    None,
514            security:      None,
515            schema_sdl:    None,
516            custom_scalars: CustomTypeRegistry::default(),
517        };
518
519        let report = SchemaOptimizer::optimize(&mut schema).unwrap();
520
521        // Type with 15 fields and JSONB column should get projection hint
522        assert!(!report.projection_hints.is_empty());
523        assert_eq!(report.projection_hints[0].type_name, "User");
524        assert_eq!(report.projection_hints[0].field_count, 15);
525
526        // Type should have sql_projection_hint set
527        assert!(schema.types[0].has_sql_projection());
528        let hint = schema.types[0].sql_projection_hint.as_ref().unwrap();
529        assert_eq!(hint.database, "postgresql");
530        assert!(hint.estimated_reduction_percent > 0);
531    }
532
533    #[test]
534    fn test_projection_not_applied_without_jsonb() {
535        let mut schema = CompiledSchema {
536            types:         vec![TypeDefinition {
537                name:                "SmallType".to_string(),
538                sql_source:          "small_table".to_string(),
539                jsonb_column:        String::new(), // No JSONB column
540                fields:              (0..15)
541                    .map(|i| FieldDefinition {
542                        name:           format!("field{i}"),
543                        field_type:     FieldType::String,
544                        nullable:       false,
545                        default_value:  None,
546                        description:    None,
547                        vector_config:  None,
548                        alias:          None,
549                        deprecation:    None,
550                        requires_scope: None,
551                    })
552                    .collect(),
553                description:         None,
554                sql_projection_hint: None,
555                implements:          vec![],
556            }],
557            enums:         vec![],
558            input_types:   vec![],
559            interfaces:    vec![],
560            unions:        vec![],
561            queries:       vec![],
562            mutations:     vec![],
563            subscriptions: vec![],
564            directives:    vec![],
565            observers:     Vec::new(),
566            fact_tables:   HashMap::default(),
567            federation:    None,
568            security:      None,
569            schema_sdl:    None,
570            custom_scalars: CustomTypeRegistry::default(),
571        };
572
573        let report = SchemaOptimizer::optimize(&mut schema).unwrap();
574
575        // Type without JSONB column should not get projection hint
576        assert!(report.projection_hints.is_empty());
577        assert!(!schema.types[0].has_sql_projection());
578    }
579}