prax_query/
db_optimize.rs

1//! Database-specific optimizations.
2//!
3//! This module provides performance optimizations tailored to each database:
4//! - Prepared statement caching (PostgreSQL, MySQL, MSSQL)
5//! - Batch size tuning for bulk operations
6//! - MongoDB pipeline aggregation
7//! - Query plan hints for complex queries
8//!
9//! # Performance Characteristics
10//!
11//! | Database   | Optimization              | Typical Gain |
12//! |------------|---------------------------|--------------|
13//! | PostgreSQL | Prepared statement cache  | 30-50%       |
14//! | MySQL      | Multi-row INSERT batching | 40-60%       |
15//! | MongoDB    | Bulk write batching       | 50-70%       |
16//! | MSSQL      | Table-valued parameters   | 30-40%       |
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use prax_query::db_optimize::{PreparedStatementCache, BatchConfig, QueryHints};
22//!
23//! // Prepared statement caching
24//! let cache = PreparedStatementCache::new(100);
25//! let stmt = cache.get_or_prepare("find_user", || {
26//!     "SELECT * FROM users WHERE id = $1"
27//! });
28//!
29//! // Auto-tuned batching
30//! let config = BatchConfig::auto_tune(payload_size, row_count);
31//! for batch in data.chunks(config.batch_size) {
32//!     execute_batch(batch);
33//! }
34//!
35//! // Query hints
36//! let hints = QueryHints::new()
37//!     .parallel(4)
38//!     .index_hint("users_email_idx");
39//! ```
40
41use parking_lot::RwLock;
42use smallvec::SmallVec;
43use std::collections::HashMap;
44use std::sync::atomic::{AtomicU64, Ordering};
45use std::time::Instant;
46
47use crate::sql::DatabaseType;
48
49// ==============================================================================
50// Prepared Statement Cache
51// ==============================================================================
52
53/// Statistics for prepared statement cache.
54#[derive(Debug, Clone, Default)]
55pub struct PreparedStatementStats {
56    /// Number of cache hits.
57    pub hits: u64,
58    /// Number of cache misses.
59    pub misses: u64,
60    /// Number of statements currently cached.
61    pub cached_count: usize,
62    /// Total preparation time saved (estimated).
63    pub time_saved_ms: u64,
64}
65
66impl PreparedStatementStats {
67    /// Calculate hit rate as a percentage.
68    pub fn hit_rate(&self) -> f64 {
69        let total = self.hits + self.misses;
70        if total == 0 {
71            0.0
72        } else {
73            (self.hits as f64 / total as f64) * 100.0
74        }
75    }
76}
77
78/// A cached prepared statement entry.
79#[derive(Debug, Clone)]
80pub struct CachedStatement {
81    /// The SQL statement text.
82    pub sql: String,
83    /// Unique statement identifier/name.
84    pub name: String,
85    /// Number of times this statement was used.
86    pub use_count: u64,
87    /// When this statement was last used.
88    pub last_used: Instant,
89    /// Estimated preparation time in microseconds.
90    pub prep_time_us: u64,
91    /// Database-specific statement handle (opaque).
92    pub handle: Option<u64>,
93}
94
95/// A cache for prepared statements.
96///
97/// This cache stores prepared statement metadata and tracks usage patterns
98/// to optimize database interactions. The actual statement handles are
99/// managed by the database driver.
100///
101/// # Features
102///
103/// - LRU eviction when capacity is reached
104/// - Usage statistics for monitoring
105/// - Thread-safe with read-write locking
106/// - Automatic cleanup of stale entries
107///
108/// # Example
109///
110/// ```rust
111/// use prax_query::db_optimize::PreparedStatementCache;
112///
113/// let cache = PreparedStatementCache::new(100);
114///
115/// // Register a prepared statement
116/// let entry = cache.get_or_create("find_user_by_email", || {
117///     "SELECT * FROM users WHERE email = $1".to_string()
118/// });
119///
120/// // Check cache stats
121/// let stats = cache.stats();
122/// println!("Hit rate: {:.1}%", stats.hit_rate());
123/// ```
124pub struct PreparedStatementCache {
125    statements: RwLock<HashMap<String, CachedStatement>>,
126    capacity: usize,
127    hits: AtomicU64,
128    misses: AtomicU64,
129    time_saved_us: AtomicU64,
130    /// Average preparation time in microseconds (for estimation).
131    avg_prep_time_us: u64,
132}
133
134impl PreparedStatementCache {
135    /// Create a new cache with the specified capacity.
136    pub fn new(capacity: usize) -> Self {
137        Self {
138            statements: RwLock::new(HashMap::with_capacity(capacity)),
139            capacity,
140            hits: AtomicU64::new(0),
141            misses: AtomicU64::new(0),
142            time_saved_us: AtomicU64::new(0),
143            avg_prep_time_us: 500, // Default 500µs estimate
144        }
145    }
146
147    /// Get or create a prepared statement entry.
148    ///
149    /// If the statement is cached, returns the cached entry and increments hit count.
150    /// Otherwise, calls the generator function, caches the result, and returns it.
151    pub fn get_or_create<F>(&self, name: &str, generator: F) -> CachedStatement
152    where
153        F: FnOnce() -> String,
154    {
155        // Try read lock first (fast path)
156        {
157            let cache = self.statements.read();
158            if let Some(stmt) = cache.get(name) {
159                self.hits.fetch_add(1, Ordering::Relaxed);
160                self.time_saved_us
161                    .fetch_add(stmt.prep_time_us, Ordering::Relaxed);
162                return stmt.clone();
163            }
164        }
165
166        // Miss - need to create and cache
167        self.misses.fetch_add(1, Ordering::Relaxed);
168
169        let sql = generator();
170        let entry = CachedStatement {
171            sql,
172            name: name.to_string(),
173            use_count: 1,
174            last_used: Instant::now(),
175            prep_time_us: self.avg_prep_time_us,
176            handle: None,
177        };
178
179        // Upgrade to write lock
180        let mut cache = self.statements.write();
181
182        // Double-check after acquiring write lock
183        if let Some(existing) = cache.get(name) {
184            self.hits.fetch_add(1, Ordering::Relaxed);
185            return existing.clone();
186        }
187
188        // Evict if at capacity (simple LRU-like: remove oldest)
189        if cache.len() >= self.capacity {
190            self.evict_oldest(&mut cache);
191        }
192
193        cache.insert(name.to_string(), entry.clone());
194        entry
195    }
196
197    /// Check if a statement is cached.
198    pub fn contains(&self, name: &str) -> bool {
199        self.statements.read().contains_key(name)
200    }
201
202    /// Get cache statistics.
203    pub fn stats(&self) -> PreparedStatementStats {
204        let cache = self.statements.read();
205        PreparedStatementStats {
206            hits: self.hits.load(Ordering::Relaxed),
207            misses: self.misses.load(Ordering::Relaxed),
208            cached_count: cache.len(),
209            time_saved_ms: self.time_saved_us.load(Ordering::Relaxed) / 1000,
210        }
211    }
212
213    /// Clear the cache.
214    pub fn clear(&self) {
215        self.statements.write().clear();
216        self.hits.store(0, Ordering::Relaxed);
217        self.misses.store(0, Ordering::Relaxed);
218        self.time_saved_us.store(0, Ordering::Relaxed);
219    }
220
221    /// Get the number of cached statements.
222    pub fn len(&self) -> usize {
223        self.statements.read().len()
224    }
225
226    /// Check if the cache is empty.
227    pub fn is_empty(&self) -> bool {
228        self.statements.read().is_empty()
229    }
230
231    /// Evict the oldest entry.
232    fn evict_oldest(&self, cache: &mut HashMap<String, CachedStatement>) {
233        if let Some((oldest_key, _)) = cache
234            .iter()
235            .min_by_key(|(_, v)| v.last_used)
236            .map(|(k, v)| (k.clone(), v.clone()))
237        {
238            cache.remove(&oldest_key);
239        }
240    }
241
242    /// Update statement usage (call after executing).
243    pub fn record_use(&self, name: &str) {
244        if let Some(stmt) = self.statements.write().get_mut(name) {
245            stmt.use_count += 1;
246            stmt.last_used = Instant::now();
247        }
248    }
249
250    /// Set a database-specific handle for a statement.
251    pub fn set_handle(&self, name: &str, handle: u64) {
252        if let Some(stmt) = self.statements.write().get_mut(name) {
253            stmt.handle = Some(handle);
254        }
255    }
256}
257
258impl Default for PreparedStatementCache {
259    fn default() -> Self {
260        Self::new(256)
261    }
262}
263
264/// Global prepared statement cache.
265pub fn global_statement_cache() -> &'static PreparedStatementCache {
266    use std::sync::OnceLock;
267    static CACHE: OnceLock<PreparedStatementCache> = OnceLock::new();
268    CACHE.get_or_init(|| PreparedStatementCache::new(512))
269}
270
271// ==============================================================================
272// Batch Size Tuning
273// ==============================================================================
274
275/// Configuration for batch operations.
276#[derive(Debug, Clone, Copy)]
277pub struct BatchConfig {
278    /// Number of rows per batch.
279    pub batch_size: usize,
280    /// Maximum payload size in bytes.
281    pub max_payload_bytes: usize,
282    /// Whether to use multi-row INSERT syntax.
283    pub multi_row_insert: bool,
284    /// Whether to use COPY for bulk inserts (PostgreSQL).
285    pub use_copy: bool,
286    /// Parallelism level for bulk operations.
287    pub parallelism: usize,
288}
289
290impl BatchConfig {
291    /// Default batch configuration.
292    pub const fn default_config() -> Self {
293        Self {
294            batch_size: 1000,
295            max_payload_bytes: 16 * 1024 * 1024, // 16MB
296            multi_row_insert: true,
297            use_copy: false,
298            parallelism: 1,
299        }
300    }
301
302    /// Create configuration optimized for the given database.
303    pub fn for_database(db_type: DatabaseType) -> Self {
304        match db_type {
305            DatabaseType::PostgreSQL => Self {
306                batch_size: 1000,
307                max_payload_bytes: 64 * 1024 * 1024, // 64MB
308                multi_row_insert: true,
309                use_copy: true, // PostgreSQL COPY is very fast
310                parallelism: 4,
311            },
312            DatabaseType::MySQL => Self {
313                batch_size: 500, // MySQL has packet size limits
314                max_payload_bytes: 16 * 1024 * 1024, // 16MB (default max_allowed_packet)
315                multi_row_insert: true,
316                use_copy: false,
317                parallelism: 2,
318            },
319            DatabaseType::SQLite => Self {
320                batch_size: 500,
321                max_payload_bytes: 1024 * 1024, // 1MB (SQLite is single-threaded)
322                multi_row_insert: true,
323                use_copy: false,
324                parallelism: 1, // SQLite doesn't benefit from parallelism
325            },
326            DatabaseType::MSSQL => Self {
327                batch_size: 1000,
328                max_payload_bytes: 32 * 1024 * 1024, // 32MB
329                multi_row_insert: true,
330                use_copy: false,
331                parallelism: 4,
332            },
333        }
334    }
335
336    /// Auto-tune batch size based on row size and count.
337    ///
338    /// This calculates an optimal batch size that:
339    /// - Stays within the max payload size
340    /// - Balances memory usage vs round-trip overhead
341    /// - Adapts to row size variations
342    ///
343    /// # Example
344    ///
345    /// ```rust
346    /// use prax_query::db_optimize::BatchConfig;
347    /// use prax_query::sql::DatabaseType;
348    ///
349    /// // Auto-tune for 10,000 rows averaging 500 bytes each
350    /// let config = BatchConfig::auto_tune(
351    ///     DatabaseType::PostgreSQL,
352    ///     500,    // avg row size in bytes
353    ///     10_000, // total row count
354    /// );
355    /// println!("Optimal batch size: {}", config.batch_size);
356    /// ```
357    pub fn auto_tune(db_type: DatabaseType, avg_row_size: usize, total_rows: usize) -> Self {
358        let mut config = Self::for_database(db_type);
359
360        // Calculate batch size based on payload limit
361        let max_rows_by_payload = if avg_row_size > 0 {
362            config.max_payload_bytes / avg_row_size
363        } else {
364            config.batch_size
365        };
366
367        // Balance: smaller batches for small datasets, larger for big ones
368        let optimal_batch = if total_rows < 100 {
369            total_rows // No batching needed for small datasets
370        } else if total_rows < 1000 {
371            (total_rows / 10).max(100)
372        } else {
373            // For large datasets, use ~10 batches or max by payload
374            let by_count = total_rows / 10;
375            by_count.min(max_rows_by_payload).min(10_000).max(100)
376        };
377
378        config.batch_size = optimal_batch;
379
380        // Adjust parallelism based on dataset size
381        if total_rows < 1000 {
382            config.parallelism = 1;
383        } else if total_rows < 10_000 {
384            config.parallelism = config.parallelism.min(2);
385        }
386
387        // Use COPY for large PostgreSQL imports
388        if matches!(db_type, DatabaseType::PostgreSQL) && total_rows > 5000 {
389            config.use_copy = true;
390        }
391
392        config
393    }
394
395    /// Create an iterator that yields batch ranges.
396    ///
397    /// # Example
398    ///
399    /// ```rust
400    /// use prax_query::db_optimize::BatchConfig;
401    ///
402    /// let config = BatchConfig::default_config();
403    /// let total = 2500;
404    ///
405    /// for (start, end) in config.batch_ranges(total) {
406    ///     println!("Processing rows {} to {}", start, end);
407    /// }
408    /// ```
409    pub fn batch_ranges(&self, total: usize) -> impl Iterator<Item = (usize, usize)> {
410        let batch_size = self.batch_size;
411        (0..total)
412            .step_by(batch_size)
413            .map(move |start| (start, (start + batch_size).min(total)))
414    }
415
416    /// Calculate the number of batches for a given total.
417    pub fn batch_count(&self, total: usize) -> usize {
418        (total + self.batch_size - 1) / self.batch_size
419    }
420}
421
422impl Default for BatchConfig {
423    fn default() -> Self {
424        Self::default_config()
425    }
426}
427
428// ==============================================================================
429// MongoDB Pipeline Aggregation
430// ==============================================================================
431
432/// A builder for combining multiple MongoDB operations into a single pipeline.
433///
434/// This reduces round-trips by batching related operations.
435///
436/// # Example
437///
438/// ```rust,ignore
439/// use prax_query::db_optimize::MongoPipelineBuilder;
440///
441/// let pipeline = MongoPipelineBuilder::new()
442///     .match_stage(doc! { "status": "active" })
443///     .lookup("orders", "user_id", "_id", "user_orders")
444///     .unwind("$user_orders")
445///     .group("$user_id", doc! { "total": { "$sum": "$amount" } })
446///     .sort(doc! { "total": -1 })
447///     .limit(10)
448///     .build();
449/// ```
450#[derive(Debug, Clone, Default)]
451pub struct MongoPipelineBuilder {
452    stages: Vec<PipelineStage>,
453    /// Whether to allow disk use for large operations.
454    pub allow_disk_use: bool,
455    /// Batch size for cursor.
456    pub batch_size: Option<u32>,
457    /// Maximum time for operation in milliseconds.
458    pub max_time_ms: Option<u64>,
459    /// Comment for profiling.
460    pub comment: Option<String>,
461}
462
463/// A MongoDB aggregation pipeline stage.
464#[derive(Debug, Clone)]
465pub enum PipelineStage {
466    /// $match stage.
467    Match(String),
468    /// $project stage.
469    Project(String),
470    /// $group stage with _id and accumulators.
471    Group { id: String, accumulators: String },
472    /// $sort stage.
473    Sort(String),
474    /// $limit stage.
475    Limit(u64),
476    /// $skip stage.
477    Skip(u64),
478    /// $unwind stage.
479    Unwind { path: String, preserve_null: bool },
480    /// $lookup stage.
481    Lookup {
482        from: String,
483        local_field: String,
484        foreign_field: String,
485        r#as: String,
486    },
487    /// $addFields stage.
488    AddFields(String),
489    /// $set stage (alias for $addFields).
490    Set(String),
491    /// $unset stage.
492    Unset(Vec<String>),
493    /// $replaceRoot stage.
494    ReplaceRoot(String),
495    /// $count stage.
496    Count(String),
497    /// $facet stage for multiple pipelines.
498    Facet(Vec<(String, Vec<PipelineStage>)>),
499    /// $bucket stage.
500    Bucket {
501        group_by: String,
502        boundaries: String,
503        default: Option<String>,
504        output: Option<String>,
505    },
506    /// $sample stage.
507    Sample(u64),
508    /// $merge stage for output.
509    Merge {
510        into: String,
511        on: Option<String>,
512        when_matched: Option<String>,
513        when_not_matched: Option<String>,
514    },
515    /// $out stage.
516    Out(String),
517    /// Raw BSON stage.
518    Raw(String),
519}
520
521impl MongoPipelineBuilder {
522    /// Create a new empty pipeline builder.
523    pub fn new() -> Self {
524        Self::default()
525    }
526
527    /// Add a $match stage.
528    pub fn match_stage(mut self, filter: impl Into<String>) -> Self {
529        self.stages.push(PipelineStage::Match(filter.into()));
530        self
531    }
532
533    /// Add a $project stage.
534    pub fn project(mut self, projection: impl Into<String>) -> Self {
535        self.stages.push(PipelineStage::Project(projection.into()));
536        self
537    }
538
539    /// Add a $group stage.
540    pub fn group(mut self, id: impl Into<String>, accumulators: impl Into<String>) -> Self {
541        self.stages.push(PipelineStage::Group {
542            id: id.into(),
543            accumulators: accumulators.into(),
544        });
545        self
546    }
547
548    /// Add a $sort stage.
549    pub fn sort(mut self, sort: impl Into<String>) -> Self {
550        self.stages.push(PipelineStage::Sort(sort.into()));
551        self
552    }
553
554    /// Add a $limit stage.
555    pub fn limit(mut self, n: u64) -> Self {
556        self.stages.push(PipelineStage::Limit(n));
557        self
558    }
559
560    /// Add a $skip stage.
561    pub fn skip(mut self, n: u64) -> Self {
562        self.stages.push(PipelineStage::Skip(n));
563        self
564    }
565
566    /// Add a $unwind stage.
567    pub fn unwind(mut self, path: impl Into<String>) -> Self {
568        self.stages.push(PipelineStage::Unwind {
569            path: path.into(),
570            preserve_null: false,
571        });
572        self
573    }
574
575    /// Add a $unwind stage with null preservation.
576    pub fn unwind_preserve_null(mut self, path: impl Into<String>) -> Self {
577        self.stages.push(PipelineStage::Unwind {
578            path: path.into(),
579            preserve_null: true,
580        });
581        self
582    }
583
584    /// Add a $lookup stage.
585    pub fn lookup(
586        mut self,
587        from: impl Into<String>,
588        local_field: impl Into<String>,
589        foreign_field: impl Into<String>,
590        r#as: impl Into<String>,
591    ) -> Self {
592        self.stages.push(PipelineStage::Lookup {
593            from: from.into(),
594            local_field: local_field.into(),
595            foreign_field: foreign_field.into(),
596            r#as: r#as.into(),
597        });
598        self
599    }
600
601    /// Add a $addFields stage.
602    pub fn add_fields(mut self, fields: impl Into<String>) -> Self {
603        self.stages.push(PipelineStage::AddFields(fields.into()));
604        self
605    }
606
607    /// Add a $set stage.
608    pub fn set(mut self, fields: impl Into<String>) -> Self {
609        self.stages.push(PipelineStage::Set(fields.into()));
610        self
611    }
612
613    /// Add a $unset stage.
614    pub fn unset<I, S>(mut self, fields: I) -> Self
615    where
616        I: IntoIterator<Item = S>,
617        S: Into<String>,
618    {
619        self.stages.push(PipelineStage::Unset(
620            fields.into_iter().map(Into::into).collect(),
621        ));
622        self
623    }
624
625    /// Add a $replaceRoot stage.
626    pub fn replace_root(mut self, new_root: impl Into<String>) -> Self {
627        self.stages
628            .push(PipelineStage::ReplaceRoot(new_root.into()));
629        self
630    }
631
632    /// Add a $count stage.
633    pub fn count(mut self, field: impl Into<String>) -> Self {
634        self.stages.push(PipelineStage::Count(field.into()));
635        self
636    }
637
638    /// Add a $sample stage.
639    pub fn sample(mut self, size: u64) -> Self {
640        self.stages.push(PipelineStage::Sample(size));
641        self
642    }
643
644    /// Add a $merge output stage.
645    pub fn merge_into(mut self, collection: impl Into<String>) -> Self {
646        self.stages.push(PipelineStage::Merge {
647            into: collection.into(),
648            on: None,
649            when_matched: None,
650            when_not_matched: None,
651        });
652        self
653    }
654
655    /// Add a $merge output stage with options.
656    pub fn merge(
657        mut self,
658        into: impl Into<String>,
659        on: Option<String>,
660        when_matched: Option<String>,
661        when_not_matched: Option<String>,
662    ) -> Self {
663        self.stages.push(PipelineStage::Merge {
664            into: into.into(),
665            on,
666            when_matched,
667            when_not_matched,
668        });
669        self
670    }
671
672    /// Add a $out stage.
673    pub fn out(mut self, collection: impl Into<String>) -> Self {
674        self.stages.push(PipelineStage::Out(collection.into()));
675        self
676    }
677
678    /// Add a raw BSON stage.
679    pub fn raw(mut self, stage: impl Into<String>) -> Self {
680        self.stages.push(PipelineStage::Raw(stage.into()));
681        self
682    }
683
684    /// Enable disk use for large operations.
685    pub fn with_disk_use(mut self) -> Self {
686        self.allow_disk_use = true;
687        self
688    }
689
690    /// Set cursor batch size.
691    pub fn with_batch_size(mut self, size: u32) -> Self {
692        self.batch_size = Some(size);
693        self
694    }
695
696    /// Set maximum execution time.
697    pub fn with_max_time(mut self, ms: u64) -> Self {
698        self.max_time_ms = Some(ms);
699        self
700    }
701
702    /// Add a comment for profiling.
703    pub fn with_comment(mut self, comment: impl Into<String>) -> Self {
704        self.comment = Some(comment.into());
705        self
706    }
707
708    /// Get the number of stages.
709    pub fn stage_count(&self) -> usize {
710        self.stages.len()
711    }
712
713    /// Build the pipeline as a JSON array string.
714    pub fn build(&self) -> String {
715        let stages: Vec<String> = self.stages.iter().map(|s| s.to_json()).collect();
716        format!("[{}]", stages.join(", "))
717    }
718
719    /// Get the stages.
720    pub fn stages(&self) -> &[PipelineStage] {
721        &self.stages
722    }
723}
724
725impl PipelineStage {
726    /// Convert to JSON representation.
727    pub fn to_json(&self) -> String {
728        match self {
729            Self::Match(filter) => format!(r#"{{ "$match": {} }}"#, filter),
730            Self::Project(proj) => format!(r#"{{ "$project": {} }}"#, proj),
731            Self::Group { id, accumulators } => {
732                format!(r#"{{ "$group": {{ "_id": {}, {} }} }}"#, id, accumulators)
733            }
734            Self::Sort(sort) => format!(r#"{{ "$sort": {} }}"#, sort),
735            Self::Limit(n) => format!(r#"{{ "$limit": {} }}"#, n),
736            Self::Skip(n) => format!(r#"{{ "$skip": {} }}"#, n),
737            Self::Unwind { path, preserve_null } => {
738                if *preserve_null {
739                    format!(
740                        r#"{{ "$unwind": {{ "path": "{}", "preserveNullAndEmptyArrays": true }} }}"#,
741                        path
742                    )
743                } else {
744                    format!(r#"{{ "$unwind": "{}" }}"#, path)
745                }
746            }
747            Self::Lookup {
748                from,
749                local_field,
750                foreign_field,
751                r#as,
752            } => {
753                format!(
754                    r#"{{ "$lookup": {{ "from": "{}", "localField": "{}", "foreignField": "{}", "as": "{}" }} }}"#,
755                    from, local_field, foreign_field, r#as
756                )
757            }
758            Self::AddFields(fields) => format!(r#"{{ "$addFields": {} }}"#, fields),
759            Self::Set(fields) => format!(r#"{{ "$set": {} }}"#, fields),
760            Self::Unset(fields) => {
761                let quoted: Vec<_> = fields.iter().map(|f| format!(r#""{}""#, f)).collect();
762                format!(r#"{{ "$unset": [{}] }}"#, quoted.join(", "))
763            }
764            Self::ReplaceRoot(root) => format!(r#"{{ "$replaceRoot": {{ "newRoot": {} }} }}"#, root),
765            Self::Count(field) => format!(r#"{{ "$count": "{}" }}"#, field),
766            Self::Facet(facets) => {
767                let facet_strs: Vec<_> = facets
768                    .iter()
769                    .map(|(name, stages)| {
770                        let pipeline: Vec<_> = stages.iter().map(|s| s.to_json()).collect();
771                        format!(r#""{}": [{}]"#, name, pipeline.join(", "))
772                    })
773                    .collect();
774                format!(r#"{{ "$facet": {{ {} }} }}"#, facet_strs.join(", "))
775            }
776            Self::Bucket {
777                group_by,
778                boundaries,
779                default,
780                output,
781            } => {
782                let mut parts = vec![
783                    format!(r#""groupBy": {}"#, group_by),
784                    format!(r#""boundaries": {}"#, boundaries),
785                ];
786                if let Some(def) = default {
787                    parts.push(format!(r#""default": {}"#, def));
788                }
789                if let Some(out) = output {
790                    parts.push(format!(r#""output": {}"#, out));
791                }
792                format!(r#"{{ "$bucket": {{ {} }} }}"#, parts.join(", "))
793            }
794            Self::Sample(size) => format!(r#"{{ "$sample": {{ "size": {} }} }}"#, size),
795            Self::Merge {
796                into,
797                on,
798                when_matched,
799                when_not_matched,
800            } => {
801                let mut parts = vec![format!(r#""into": "{}""#, into)];
802                if let Some(on_field) = on {
803                    parts.push(format!(r#""on": "{}""#, on_field));
804                }
805                if let Some(matched) = when_matched {
806                    parts.push(format!(r#""whenMatched": "{}""#, matched));
807                }
808                if let Some(not_matched) = when_not_matched {
809                    parts.push(format!(r#""whenNotMatched": "{}""#, not_matched));
810                }
811                format!(r#"{{ "$merge": {{ {} }} }}"#, parts.join(", "))
812            }
813            Self::Out(collection) => format!(r#"{{ "$out": "{}" }}"#, collection),
814            Self::Raw(stage) => stage.clone(),
815        }
816    }
817}
818
819// ==============================================================================
820// Query Plan Hints
821// ==============================================================================
822
823/// Query plan hints for optimizing complex queries.
824///
825/// These hints are applied to queries to guide the query planner:
826/// - Index hints to force specific index usage
827/// - Parallelism settings
828/// - Join strategies
829/// - Materialization preferences
830///
831/// # Database Support
832///
833/// | Hint Type | PostgreSQL | MySQL | SQLite | MSSQL |
834/// |-----------|------------|-------|--------|-------|
835/// | Index     | ✅ (GUC)   | ✅    | ✅     | ✅    |
836/// | Parallel  | ✅         | ❌    | ❌     | ✅    |
837/// | Join      | ✅         | ✅    | ❌     | ✅    |
838/// | CTE Mat   | ✅         | ❌    | ❌     | ❌    |
839///
840/// # Example
841///
842/// ```rust
843/// use prax_query::db_optimize::QueryHints;
844/// use prax_query::sql::DatabaseType;
845///
846/// let hints = QueryHints::new()
847///     .index_hint("users_email_idx")
848///     .parallel(4)
849///     .no_seq_scan();
850///
851/// let sql = hints.apply_to_query("SELECT * FROM users WHERE email = $1", DatabaseType::PostgreSQL);
852/// ```
853#[derive(Debug, Clone, Default)]
854pub struct QueryHints {
855    /// Index hints.
856    pub indexes: SmallVec<[IndexHint; 4]>,
857    /// Parallelism level (0 = default, >0 = specific workers).
858    pub parallel_workers: Option<u32>,
859    /// Join method hints.
860    pub join_hints: SmallVec<[JoinHint; 4]>,
861    /// Whether to prevent sequential scans.
862    pub no_seq_scan: bool,
863    /// Whether to prevent index scans.
864    pub no_index_scan: bool,
865    /// CTE materialization preference.
866    pub cte_materialized: Option<bool>,
867    /// Query timeout in milliseconds.
868    pub timeout_ms: Option<u64>,
869    /// Custom database-specific hints.
870    pub custom: Vec<String>,
871}
872
873/// An index hint.
874#[derive(Debug, Clone)]
875pub struct IndexHint {
876    /// Table the index belongs to.
877    pub table: Option<String>,
878    /// Index name.
879    pub index_name: String,
880    /// Hint type.
881    pub hint_type: IndexHintType,
882}
883
884/// Type of index hint.
885#[derive(Debug, Clone, Copy, PartialEq, Eq)]
886pub enum IndexHintType {
887    /// Force use of this index.
888    Use,
889    /// Force ignore of this index.
890    Ignore,
891    /// Prefer this index if possible.
892    Prefer,
893}
894
895/// A join method hint.
896#[derive(Debug, Clone)]
897pub struct JoinHint {
898    /// Tables involved in the join.
899    pub tables: Vec<String>,
900    /// Join method to use.
901    pub method: JoinMethod,
902}
903
904/// Join methods.
905#[derive(Debug, Clone, Copy, PartialEq, Eq)]
906pub enum JoinMethod {
907    /// Nested loop join.
908    NestedLoop,
909    /// Hash join.
910    Hash,
911    /// Merge join.
912    Merge,
913}
914
915impl QueryHints {
916    /// Create new empty hints.
917    pub fn new() -> Self {
918        Self::default()
919    }
920
921    /// Add an index hint.
922    pub fn index_hint(mut self, index_name: impl Into<String>) -> Self {
923        self.indexes.push(IndexHint {
924            table: None,
925            index_name: index_name.into(),
926            hint_type: IndexHintType::Use,
927        });
928        self
929    }
930
931    /// Add an index hint for a specific table.
932    pub fn index_hint_for_table(
933        mut self,
934        table: impl Into<String>,
935        index_name: impl Into<String>,
936    ) -> Self {
937        self.indexes.push(IndexHint {
938            table: Some(table.into()),
939            index_name: index_name.into(),
940            hint_type: IndexHintType::Use,
941        });
942        self
943    }
944
945    /// Ignore a specific index.
946    pub fn ignore_index(mut self, index_name: impl Into<String>) -> Self {
947        self.indexes.push(IndexHint {
948            table: None,
949            index_name: index_name.into(),
950            hint_type: IndexHintType::Ignore,
951        });
952        self
953    }
954
955    /// Set parallelism level.
956    pub fn parallel(mut self, workers: u32) -> Self {
957        self.parallel_workers = Some(workers);
958        self
959    }
960
961    /// Disable parallel execution.
962    pub fn no_parallel(mut self) -> Self {
963        self.parallel_workers = Some(0);
964        self
965    }
966
967    /// Prevent sequential scans.
968    pub fn no_seq_scan(mut self) -> Self {
969        self.no_seq_scan = true;
970        self
971    }
972
973    /// Prevent index scans.
974    pub fn no_index_scan(mut self) -> Self {
975        self.no_index_scan = true;
976        self
977    }
978
979    /// Set CTE materialization preference.
980    pub fn cte_materialized(mut self, materialized: bool) -> Self {
981        self.cte_materialized = Some(materialized);
982        self
983    }
984
985    /// Force nested loop join.
986    pub fn nested_loop_join(mut self, tables: Vec<String>) -> Self {
987        self.join_hints.push(JoinHint {
988            tables,
989            method: JoinMethod::NestedLoop,
990        });
991        self
992    }
993
994    /// Force hash join.
995    pub fn hash_join(mut self, tables: Vec<String>) -> Self {
996        self.join_hints.push(JoinHint {
997            tables,
998            method: JoinMethod::Hash,
999        });
1000        self
1001    }
1002
1003    /// Force merge join.
1004    pub fn merge_join(mut self, tables: Vec<String>) -> Self {
1005        self.join_hints.push(JoinHint {
1006            tables,
1007            method: JoinMethod::Merge,
1008        });
1009        self
1010    }
1011
1012    /// Set query timeout.
1013    pub fn timeout(mut self, ms: u64) -> Self {
1014        self.timeout_ms = Some(ms);
1015        self
1016    }
1017
1018    /// Add a custom database-specific hint.
1019    pub fn custom_hint(mut self, hint: impl Into<String>) -> Self {
1020        self.custom.push(hint.into());
1021        self
1022    }
1023
1024    /// Generate hints as SQL prefix for the given database.
1025    pub fn to_sql_prefix(&self, db_type: DatabaseType) -> String {
1026        match db_type {
1027            DatabaseType::PostgreSQL => self.to_postgres_prefix(),
1028            DatabaseType::MySQL => self.to_mysql_prefix(),
1029            DatabaseType::SQLite => self.to_sqlite_prefix(),
1030            DatabaseType::MSSQL => self.to_mssql_prefix(),
1031        }
1032    }
1033
1034    /// Generate hints as SQL suffix (for query options).
1035    pub fn to_sql_suffix(&self, db_type: DatabaseType) -> String {
1036        match db_type {
1037            DatabaseType::MySQL => self.to_mysql_suffix(),
1038            DatabaseType::MSSQL => self.to_mssql_suffix(),
1039            _ => String::new(),
1040        }
1041    }
1042
1043    /// Apply hints to a query.
1044    pub fn apply_to_query(&self, query: &str, db_type: DatabaseType) -> String {
1045        let prefix = self.to_sql_prefix(db_type);
1046        let suffix = self.to_sql_suffix(db_type);
1047
1048        if prefix.is_empty() && suffix.is_empty() {
1049            return query.to_string();
1050        }
1051
1052        let mut result = String::with_capacity(prefix.len() + query.len() + suffix.len() + 2);
1053        if !prefix.is_empty() {
1054            result.push_str(&prefix);
1055            result.push('\n');
1056        }
1057        result.push_str(query);
1058        if !suffix.is_empty() {
1059            result.push(' ');
1060            result.push_str(&suffix);
1061        }
1062        result
1063    }
1064
1065    fn to_postgres_prefix(&self) -> String {
1066        let mut settings: Vec<String> = Vec::new();
1067
1068        if self.no_seq_scan {
1069            settings.push("SET LOCAL enable_seqscan = off;".to_string());
1070        }
1071        if self.no_index_scan {
1072            settings.push("SET LOCAL enable_indexscan = off;".to_string());
1073        }
1074        if let Some(workers) = self.parallel_workers {
1075            settings.push(format!("SET LOCAL max_parallel_workers_per_gather = {};", workers));
1076        }
1077        if let Some(ms) = self.timeout_ms {
1078            settings.push(format!("SET LOCAL statement_timeout = {};", ms));
1079        }
1080
1081        // Join hints
1082        for hint in &self.join_hints {
1083            match hint.method {
1084                JoinMethod::NestedLoop => {
1085                    settings.push("SET LOCAL enable_hashjoin = off;".to_string());
1086                    settings.push("SET LOCAL enable_mergejoin = off;".to_string());
1087                }
1088                JoinMethod::Hash => {
1089                    settings.push("SET LOCAL enable_nestloop = off;".to_string());
1090                    settings.push("SET LOCAL enable_mergejoin = off;".to_string());
1091                }
1092                JoinMethod::Merge => {
1093                    settings.push("SET LOCAL enable_nestloop = off;".to_string());
1094                    settings.push("SET LOCAL enable_hashjoin = off;".to_string());
1095                }
1096            }
1097        }
1098
1099        // Custom hints
1100        for hint in &self.custom {
1101            settings.push(hint.clone());
1102        }
1103
1104        settings.join("\n")
1105    }
1106
1107    fn to_mysql_prefix(&self) -> String {
1108        // MySQL uses inline hints, not SET statements
1109        String::new()
1110    }
1111
1112    fn to_mysql_suffix(&self) -> String {
1113        let mut hints: Vec<String> = Vec::new();
1114
1115        // Index hints (applied after table name in actual query, but we return as hint comment)
1116        for hint in &self.indexes {
1117            let hint_type = match hint.hint_type {
1118                IndexHintType::Use => "USE INDEX",
1119                IndexHintType::Ignore => "IGNORE INDEX",
1120                IndexHintType::Prefer => "FORCE INDEX",
1121            };
1122            if let Some(ref table) = hint.table {
1123                hints.push(format!("/* {} FOR {} ({}) */", hint_type, table, hint.index_name));
1124            } else {
1125                hints.push(format!("/* {} ({}) */", hint_type, hint.index_name));
1126            }
1127        }
1128
1129        // Join hints
1130        for hint in &self.join_hints {
1131            let method = match hint.method {
1132                JoinMethod::NestedLoop => "BNL",
1133                JoinMethod::Hash => "HASH_JOIN",
1134                JoinMethod::Merge => "MERGE",
1135            };
1136            hints.push(format!("/* {}({}) */", method, hint.tables.join(", ")));
1137        }
1138
1139        hints.join(" ")
1140    }
1141
1142    fn to_sqlite_prefix(&self) -> String {
1143        // SQLite has limited hint support
1144        String::new()
1145    }
1146
1147    fn to_mssql_prefix(&self) -> String {
1148        // MSSQL uses inline OPTION hints
1149        String::new()
1150    }
1151
1152    fn to_mssql_suffix(&self) -> String {
1153        let mut options: Vec<String> = Vec::new();
1154
1155        // Index hints
1156        for hint in &self.indexes {
1157            match hint.hint_type {
1158                IndexHintType::Use => {
1159                    if let Some(ref table) = hint.table {
1160                        options.push(format!("TABLE HINT({}, INDEX({}))", table, hint.index_name));
1161                    }
1162                }
1163                IndexHintType::Ignore => {
1164                    // MSSQL doesn't have ignore index, skip
1165                }
1166                IndexHintType::Prefer => {
1167                    if let Some(ref table) = hint.table {
1168                        options.push(format!("TABLE HINT({}, FORCESEEK({}))", table, hint.index_name));
1169                    }
1170                }
1171            }
1172        }
1173
1174        // Parallelism
1175        if let Some(workers) = self.parallel_workers {
1176            if workers == 0 {
1177                options.push("MAXDOP 1".to_string());
1178            } else {
1179                options.push(format!("MAXDOP {}", workers));
1180            }
1181        }
1182
1183        // Join hints
1184        for hint in &self.join_hints {
1185            let method = match hint.method {
1186                JoinMethod::NestedLoop => "LOOP JOIN",
1187                JoinMethod::Hash => "HASH JOIN",
1188                JoinMethod::Merge => "MERGE JOIN",
1189            };
1190            options.push(method.to_string());
1191        }
1192
1193        if options.is_empty() {
1194            String::new()
1195        } else {
1196            format!("OPTION ({})", options.join(", "))
1197        }
1198    }
1199
1200    /// Check if any hints are configured.
1201    pub fn has_hints(&self) -> bool {
1202        !self.indexes.is_empty()
1203            || self.parallel_workers.is_some()
1204            || !self.join_hints.is_empty()
1205            || self.no_seq_scan
1206            || self.no_index_scan
1207            || self.cte_materialized.is_some()
1208            || self.timeout_ms.is_some()
1209            || !self.custom.is_empty()
1210    }
1211}
1212
1213// ==============================================================================
1214// Tests
1215// ==============================================================================
1216
1217#[cfg(test)]
1218mod tests {
1219    use super::*;
1220
1221    #[test]
1222    fn test_prepared_statement_cache() {
1223        let cache = PreparedStatementCache::new(10);
1224
1225        // First access - miss
1226        let stmt1 = cache.get_or_create("test", || "SELECT * FROM users".to_string());
1227        assert_eq!(stmt1.sql, "SELECT * FROM users");
1228
1229        let stats = cache.stats();
1230        assert_eq!(stats.misses, 1);
1231        assert_eq!(stats.hits, 0);
1232
1233        // Second access - hit
1234        let stmt2 = cache.get_or_create("test", || panic!("Should not be called"));
1235        assert_eq!(stmt2.sql, "SELECT * FROM users");
1236
1237        let stats = cache.stats();
1238        assert_eq!(stats.misses, 1);
1239        assert_eq!(stats.hits, 1);
1240        assert!(stats.hit_rate() > 0.0);
1241    }
1242
1243    #[test]
1244    fn test_batch_config_auto_tune() {
1245        // Small dataset
1246        let config = BatchConfig::auto_tune(DatabaseType::PostgreSQL, 100, 50);
1247        assert_eq!(config.batch_size, 50); // No batching needed
1248
1249        // Medium dataset
1250        let config = BatchConfig::auto_tune(DatabaseType::PostgreSQL, 500, 5000);
1251        assert!(config.batch_size >= 100);
1252        assert!(config.batch_size <= 5000);
1253
1254        // Large dataset
1255        let config = BatchConfig::auto_tune(DatabaseType::PostgreSQL, 200, 100_000);
1256        assert!(config.use_copy); // Should use COPY for large PG imports
1257        assert!(config.batch_size >= 100);
1258    }
1259
1260    #[test]
1261    fn test_batch_ranges() {
1262        let config = BatchConfig {
1263            batch_size: 100,
1264            ..Default::default()
1265        };
1266
1267        let ranges: Vec<_> = config.batch_ranges(250).collect();
1268        assert_eq!(ranges.len(), 3);
1269        assert_eq!(ranges[0], (0, 100));
1270        assert_eq!(ranges[1], (100, 200));
1271        assert_eq!(ranges[2], (200, 250));
1272    }
1273
1274    #[test]
1275    fn test_mongo_pipeline_builder() {
1276        let pipeline = MongoPipelineBuilder::new()
1277            .match_stage(r#"{ "status": "active" }"#)
1278            .lookup("orders", "user_id", "_id", "user_orders")
1279            .unwind("$user_orders")
1280            .group(r#""$user_id""#, r#""total": { "$sum": "$amount" }"#)
1281            .sort(r#"{ "total": -1 }"#)
1282            .limit(10)
1283            .build();
1284
1285        assert!(pipeline.contains("$match"));
1286        assert!(pipeline.contains("$lookup"));
1287        assert!(pipeline.contains("$unwind"));
1288        assert!(pipeline.contains("$group"));
1289        assert!(pipeline.contains("$sort"));
1290        assert!(pipeline.contains("$limit"));
1291    }
1292
1293    #[test]
1294    fn test_query_hints_postgres() {
1295        let hints = QueryHints::new()
1296            .no_seq_scan()
1297            .parallel(4)
1298            .timeout(5000);
1299
1300        let prefix = hints.to_sql_prefix(DatabaseType::PostgreSQL);
1301        assert!(prefix.contains("enable_seqscan = off"));
1302        assert!(prefix.contains("max_parallel_workers_per_gather = 4"));
1303        assert!(prefix.contains("statement_timeout = 5000"));
1304    }
1305
1306    #[test]
1307    fn test_query_hints_mssql() {
1308        let hints = QueryHints::new()
1309            .parallel(2)
1310            .hash_join(vec!["users".to_string(), "orders".to_string()]);
1311
1312        let suffix = hints.to_sql_suffix(DatabaseType::MSSQL);
1313        assert!(suffix.contains("MAXDOP 2"));
1314        assert!(suffix.contains("HASH JOIN"));
1315    }
1316
1317    #[test]
1318    fn test_query_hints_apply() {
1319        let hints = QueryHints::new().no_seq_scan();
1320
1321        let query = "SELECT * FROM users WHERE id = $1";
1322        let result = hints.apply_to_query(query, DatabaseType::PostgreSQL);
1323
1324        assert!(result.contains("enable_seqscan = off"));
1325        assert!(result.contains("SELECT * FROM users"));
1326    }
1327}
1328
prax_query/db_optimize.rs

prax_query/
db_optimize.rs