prax_query/
db_optimize.rs

1//! Database-specific optimizations.
2//!
3//! This module provides performance optimizations tailored to each database:
4//! - Prepared statement caching (PostgreSQL, MySQL, MSSQL)
5//! - Batch size tuning for bulk operations
6//! - MongoDB pipeline aggregation
7//! - Query plan hints for complex queries
8//!
9//! # Performance Characteristics
10//!
11//! | Database   | Optimization              | Typical Gain |
12//! |------------|---------------------------|--------------|
13//! | PostgreSQL | Prepared statement cache  | 30-50%       |
14//! | MySQL      | Multi-row INSERT batching | 40-60%       |
15//! | MongoDB    | Bulk write batching       | 50-70%       |
16//! | MSSQL      | Table-valued parameters   | 30-40%       |
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use prax_query::db_optimize::{PreparedStatementCache, BatchConfig, QueryHints};
22//!
23//! // Prepared statement caching
24//! let cache = PreparedStatementCache::new(100);
25//! let stmt = cache.get_or_prepare("find_user", || {
26//!     "SELECT * FROM users WHERE id = $1"
27//! });
28//!
29//! // Auto-tuned batching
30//! let config = BatchConfig::auto_tune(payload_size, row_count);
31//! for batch in data.chunks(config.batch_size) {
32//!     execute_batch(batch);
33//! }
34//!
35//! // Query hints
36//! let hints = QueryHints::new()
37//!     .parallel(4)
38//!     .index_hint("users_email_idx");
39//! ```
40
41use parking_lot::RwLock;
42use smallvec::SmallVec;
43use std::collections::HashMap;
44use std::sync::atomic::{AtomicU64, Ordering};
45use std::time::Instant;
46
47use crate::sql::DatabaseType;
48
49// ==============================================================================
50// Prepared Statement Cache
51// ==============================================================================
52
53/// Statistics for prepared statement cache.
54#[derive(Debug, Clone, Default)]
55pub struct PreparedStatementStats {
56    /// Number of cache hits.
57    pub hits: u64,
58    /// Number of cache misses.
59    pub misses: u64,
60    /// Number of statements currently cached.
61    pub cached_count: usize,
62    /// Total preparation time saved (estimated).
63    pub time_saved_ms: u64,
64}
65
66impl PreparedStatementStats {
67    /// Calculate hit rate as a percentage.
68    pub fn hit_rate(&self) -> f64 {
69        let total = self.hits + self.misses;
70        if total == 0 {
71            0.0
72        } else {
73            (self.hits as f64 / total as f64) * 100.0
74        }
75    }
76}
77
78/// A cached prepared statement entry.
79#[derive(Debug, Clone)]
80pub struct CachedStatement {
81    /// The SQL statement text.
82    pub sql: String,
83    /// Unique statement identifier/name.
84    pub name: String,
85    /// Number of times this statement was used.
86    pub use_count: u64,
87    /// When this statement was last used.
88    pub last_used: Instant,
89    /// Estimated preparation time in microseconds.
90    pub prep_time_us: u64,
91    /// Database-specific statement handle (opaque).
92    pub handle: Option<u64>,
93}
94
95/// A cache for prepared statements.
96///
97/// This cache stores prepared statement metadata and tracks usage patterns
98/// to optimize database interactions. The actual statement handles are
99/// managed by the database driver.
100///
101/// # Features
102///
103/// - LRU eviction when capacity is reached
104/// - Usage statistics for monitoring
105/// - Thread-safe with read-write locking
106/// - Automatic cleanup of stale entries
107///
108/// # Example
109///
110/// ```rust
111/// use prax_query::db_optimize::PreparedStatementCache;
112///
113/// let cache = PreparedStatementCache::new(100);
114///
115/// // Register a prepared statement
116/// let entry = cache.get_or_create("find_user_by_email", || {
117///     "SELECT * FROM users WHERE email = $1".to_string()
118/// });
119///
120/// // Check cache stats
121/// let stats = cache.stats();
122/// println!("Hit rate: {:.1}%", stats.hit_rate());
123/// ```
124pub struct PreparedStatementCache {
125    statements: RwLock<HashMap<String, CachedStatement>>,
126    capacity: usize,
127    hits: AtomicU64,
128    misses: AtomicU64,
129    time_saved_us: AtomicU64,
130    /// Average preparation time in microseconds (for estimation).
131    avg_prep_time_us: u64,
132}
133
134impl PreparedStatementCache {
135    /// Create a new cache with the specified capacity.
136    pub fn new(capacity: usize) -> Self {
137        Self {
138            statements: RwLock::new(HashMap::with_capacity(capacity)),
139            capacity,
140            hits: AtomicU64::new(0),
141            misses: AtomicU64::new(0),
142            time_saved_us: AtomicU64::new(0),
143            avg_prep_time_us: 500, // Default 500µs estimate
144        }
145    }
146
147    /// Get or create a prepared statement entry.
148    ///
149    /// If the statement is cached, returns the cached entry and increments hit count.
150    /// Otherwise, calls the generator function, caches the result, and returns it.
151    pub fn get_or_create<F>(&self, name: &str, generator: F) -> CachedStatement
152    where
153        F: FnOnce() -> String,
154    {
155        // Try read lock first (fast path)
156        {
157            let cache = self.statements.read();
158            if let Some(stmt) = cache.get(name) {
159                self.hits.fetch_add(1, Ordering::Relaxed);
160                self.time_saved_us
161                    .fetch_add(stmt.prep_time_us, Ordering::Relaxed);
162                return stmt.clone();
163            }
164        }
165
166        // Miss - need to create and cache
167        self.misses.fetch_add(1, Ordering::Relaxed);
168
169        let sql = generator();
170        let entry = CachedStatement {
171            sql,
172            name: name.to_string(),
173            use_count: 1,
174            last_used: Instant::now(),
175            prep_time_us: self.avg_prep_time_us,
176            handle: None,
177        };
178
179        // Upgrade to write lock
180        let mut cache = self.statements.write();
181
182        // Double-check after acquiring write lock
183        if let Some(existing) = cache.get(name) {
184            self.hits.fetch_add(1, Ordering::Relaxed);
185            return existing.clone();
186        }
187
188        // Evict if at capacity (simple LRU-like: remove oldest)
189        if cache.len() >= self.capacity {
190            self.evict_oldest(&mut cache);
191        }
192
193        cache.insert(name.to_string(), entry.clone());
194        entry
195    }
196
197    /// Check if a statement is cached.
198    pub fn contains(&self, name: &str) -> bool {
199        self.statements.read().contains_key(name)
200    }
201
202    /// Get cache statistics.
203    pub fn stats(&self) -> PreparedStatementStats {
204        let cache = self.statements.read();
205        PreparedStatementStats {
206            hits: self.hits.load(Ordering::Relaxed),
207            misses: self.misses.load(Ordering::Relaxed),
208            cached_count: cache.len(),
209            time_saved_ms: self.time_saved_us.load(Ordering::Relaxed) / 1000,
210        }
211    }
212
213    /// Clear the cache.
214    pub fn clear(&self) {
215        self.statements.write().clear();
216        self.hits.store(0, Ordering::Relaxed);
217        self.misses.store(0, Ordering::Relaxed);
218        self.time_saved_us.store(0, Ordering::Relaxed);
219    }
220
221    /// Get the number of cached statements.
222    pub fn len(&self) -> usize {
223        self.statements.read().len()
224    }
225
226    /// Check if the cache is empty.
227    pub fn is_empty(&self) -> bool {
228        self.statements.read().is_empty()
229    }
230
231    /// Evict the oldest entry.
232    fn evict_oldest(&self, cache: &mut HashMap<String, CachedStatement>) {
233        if let Some((oldest_key, _)) = cache
234            .iter()
235            .min_by_key(|(_, v)| v.last_used)
236            .map(|(k, v)| (k.clone(), v.clone()))
237        {
238            cache.remove(&oldest_key);
239        }
240    }
241
242    /// Update statement usage (call after executing).
243    pub fn record_use(&self, name: &str) {
244        if let Some(stmt) = self.statements.write().get_mut(name) {
245            stmt.use_count += 1;
246            stmt.last_used = Instant::now();
247        }
248    }
249
250    /// Set a database-specific handle for a statement.
251    pub fn set_handle(&self, name: &str, handle: u64) {
252        if let Some(stmt) = self.statements.write().get_mut(name) {
253            stmt.handle = Some(handle);
254        }
255    }
256}
257
258impl Default for PreparedStatementCache {
259    fn default() -> Self {
260        Self::new(256)
261    }
262}
263
264/// Global prepared statement cache.
265pub fn global_statement_cache() -> &'static PreparedStatementCache {
266    use std::sync::OnceLock;
267    static CACHE: OnceLock<PreparedStatementCache> = OnceLock::new();
268    CACHE.get_or_init(|| PreparedStatementCache::new(512))
269}
270
271// ==============================================================================
272// Batch Size Tuning
273// ==============================================================================
274
275/// Configuration for batch operations.
276#[derive(Debug, Clone, Copy)]
277pub struct BatchConfig {
278    /// Number of rows per batch.
279    pub batch_size: usize,
280    /// Maximum payload size in bytes.
281    pub max_payload_bytes: usize,
282    /// Whether to use multi-row INSERT syntax.
283    pub multi_row_insert: bool,
284    /// Whether to use COPY for bulk inserts (PostgreSQL).
285    pub use_copy: bool,
286    /// Parallelism level for bulk operations.
287    pub parallelism: usize,
288}
289
290impl BatchConfig {
291    /// Default batch configuration.
292    pub const fn default_config() -> Self {
293        Self {
294            batch_size: 1000,
295            max_payload_bytes: 16 * 1024 * 1024, // 16MB
296            multi_row_insert: true,
297            use_copy: false,
298            parallelism: 1,
299        }
300    }
301
302    /// Create configuration optimized for the given database.
303    pub fn for_database(db_type: DatabaseType) -> Self {
304        match db_type {
305            DatabaseType::PostgreSQL => Self {
306                batch_size: 1000,
307                max_payload_bytes: 64 * 1024 * 1024, // 64MB
308                multi_row_insert: true,
309                use_copy: true, // PostgreSQL COPY is very fast
310                parallelism: 4,
311            },
312            DatabaseType::MySQL => Self {
313                batch_size: 500,                     // MySQL has packet size limits
314                max_payload_bytes: 16 * 1024 * 1024, // 16MB (default max_allowed_packet)
315                multi_row_insert: true,
316                use_copy: false,
317                parallelism: 2,
318            },
319            DatabaseType::SQLite => Self {
320                batch_size: 500,
321                max_payload_bytes: 1024 * 1024, // 1MB (SQLite is single-threaded)
322                multi_row_insert: true,
323                use_copy: false,
324                parallelism: 1, // SQLite doesn't benefit from parallelism
325            },
326            DatabaseType::MSSQL => Self {
327                batch_size: 1000,
328                max_payload_bytes: 32 * 1024 * 1024, // 32MB
329                multi_row_insert: true,
330                use_copy: false,
331                parallelism: 4,
332            },
333        }
334    }
335
336    /// Auto-tune batch size based on row size and count.
337    ///
338    /// This calculates an optimal batch size that:
339    /// - Stays within the max payload size
340    /// - Balances memory usage vs round-trip overhead
341    /// - Adapts to row size variations
342    ///
343    /// # Example
344    ///
345    /// ```rust
346    /// use prax_query::db_optimize::BatchConfig;
347    /// use prax_query::sql::DatabaseType;
348    ///
349    /// // Auto-tune for 10,000 rows averaging 500 bytes each
350    /// let config = BatchConfig::auto_tune(
351    ///     DatabaseType::PostgreSQL,
352    ///     500,    // avg row size in bytes
353    ///     10_000, // total row count
354    /// );
355    /// println!("Optimal batch size: {}", config.batch_size);
356    /// ```
357    pub fn auto_tune(db_type: DatabaseType, avg_row_size: usize, total_rows: usize) -> Self {
358        let mut config = Self::for_database(db_type);
359
360        // Calculate batch size based on payload limit
361        let max_rows_by_payload = config
362            .max_payload_bytes
363            .checked_div(avg_row_size)
364            .unwrap_or(config.batch_size);
365
366        // Balance: smaller batches for small datasets, larger for big ones
367        let optimal_batch = if total_rows < 100 {
368            total_rows // No batching needed for small datasets
369        } else if total_rows < 1000 {
370            (total_rows / 10).max(100)
371        } else {
372            // For large datasets, use ~10 batches or max by payload
373            let by_count = total_rows / 10;
374            by_count.min(max_rows_by_payload).clamp(100, 10_000)
375        };
376
377        config.batch_size = optimal_batch;
378
379        // Adjust parallelism based on dataset size
380        if total_rows < 1000 {
381            config.parallelism = 1;
382        } else if total_rows < 10_000 {
383            config.parallelism = config.parallelism.min(2);
384        }
385
386        // Use COPY for large PostgreSQL imports
387        if matches!(db_type, DatabaseType::PostgreSQL) && total_rows > 5000 {
388            config.use_copy = true;
389        }
390
391        config
392    }
393
394    /// Create an iterator that yields batch ranges.
395    ///
396    /// # Example
397    ///
398    /// ```rust
399    /// use prax_query::db_optimize::BatchConfig;
400    ///
401    /// let config = BatchConfig::default_config();
402    /// let total = 2500;
403    ///
404    /// for (start, end) in config.batch_ranges(total) {
405    ///     println!("Processing rows {} to {}", start, end);
406    /// }
407    /// ```
408    pub fn batch_ranges(&self, total: usize) -> impl Iterator<Item = (usize, usize)> {
409        let batch_size = self.batch_size;
410        (0..total)
411            .step_by(batch_size)
412            .map(move |start| (start, (start + batch_size).min(total)))
413    }
414
415    /// Calculate the number of batches for a given total.
416    pub fn batch_count(&self, total: usize) -> usize {
417        total.div_ceil(self.batch_size)
418    }
419}
420
421impl Default for BatchConfig {
422    fn default() -> Self {
423        Self::default_config()
424    }
425}
426
427// ==============================================================================
428// MongoDB Pipeline Aggregation
429// ==============================================================================
430
431/// A builder for combining multiple MongoDB operations into a single pipeline.
432///
433/// This reduces round-trips by batching related operations.
434///
435/// # Example
436///
437/// ```rust,ignore
438/// use prax_query::db_optimize::MongoPipelineBuilder;
439///
440/// let pipeline = MongoPipelineBuilder::new()
441///     .match_stage(doc! { "status": "active" })
442///     .lookup("orders", "user_id", "_id", "user_orders")
443///     .unwind("$user_orders")
444///     .group("$user_id", doc! { "total": { "$sum": "$amount" } })
445///     .sort(doc! { "total": -1 })
446///     .limit(10)
447///     .build();
448/// ```
449#[derive(Debug, Clone, Default)]
450pub struct MongoPipelineBuilder {
451    stages: Vec<PipelineStage>,
452    /// Whether to allow disk use for large operations.
453    pub allow_disk_use: bool,
454    /// Batch size for cursor.
455    pub batch_size: Option<u32>,
456    /// Maximum time for operation in milliseconds.
457    pub max_time_ms: Option<u64>,
458    /// Comment for profiling.
459    pub comment: Option<String>,
460}
461
462/// A MongoDB aggregation pipeline stage.
463#[derive(Debug, Clone)]
464pub enum PipelineStage {
465    /// $match stage.
466    Match(String),
467    /// $project stage.
468    Project(String),
469    /// $group stage with _id and accumulators.
470    Group { id: String, accumulators: String },
471    /// $sort stage.
472    Sort(String),
473    /// $limit stage.
474    Limit(u64),
475    /// $skip stage.
476    Skip(u64),
477    /// $unwind stage.
478    Unwind { path: String, preserve_null: bool },
479    /// $lookup stage.
480    Lookup {
481        from: String,
482        local_field: String,
483        foreign_field: String,
484        r#as: String,
485    },
486    /// $addFields stage.
487    AddFields(String),
488    /// $set stage (alias for $addFields).
489    Set(String),
490    /// $unset stage.
491    Unset(Vec<String>),
492    /// $replaceRoot stage.
493    ReplaceRoot(String),
494    /// $count stage.
495    Count(String),
496    /// $facet stage for multiple pipelines.
497    Facet(Vec<(String, Vec<PipelineStage>)>),
498    /// $bucket stage.
499    Bucket {
500        group_by: String,
501        boundaries: String,
502        default: Option<String>,
503        output: Option<String>,
504    },
505    /// $sample stage.
506    Sample(u64),
507    /// $merge stage for output.
508    Merge {
509        into: String,
510        on: Option<String>,
511        when_matched: Option<String>,
512        when_not_matched: Option<String>,
513    },
514    /// $out stage.
515    Out(String),
516    /// Raw BSON stage.
517    Raw(String),
518}
519
520impl MongoPipelineBuilder {
521    /// Create a new empty pipeline builder.
522    pub fn new() -> Self {
523        Self::default()
524    }
525
526    /// Add a $match stage.
527    pub fn match_stage(mut self, filter: impl Into<String>) -> Self {
528        self.stages.push(PipelineStage::Match(filter.into()));
529        self
530    }
531
532    /// Add a $project stage.
533    pub fn project(mut self, projection: impl Into<String>) -> Self {
534        self.stages.push(PipelineStage::Project(projection.into()));
535        self
536    }
537
538    /// Add a $group stage.
539    pub fn group(mut self, id: impl Into<String>, accumulators: impl Into<String>) -> Self {
540        self.stages.push(PipelineStage::Group {
541            id: id.into(),
542            accumulators: accumulators.into(),
543        });
544        self
545    }
546
547    /// Add a $sort stage.
548    pub fn sort(mut self, sort: impl Into<String>) -> Self {
549        self.stages.push(PipelineStage::Sort(sort.into()));
550        self
551    }
552
553    /// Add a $limit stage.
554    pub fn limit(mut self, n: u64) -> Self {
555        self.stages.push(PipelineStage::Limit(n));
556        self
557    }
558
559    /// Add a $skip stage.
560    pub fn skip(mut self, n: u64) -> Self {
561        self.stages.push(PipelineStage::Skip(n));
562        self
563    }
564
565    /// Add a $unwind stage.
566    pub fn unwind(mut self, path: impl Into<String>) -> Self {
567        self.stages.push(PipelineStage::Unwind {
568            path: path.into(),
569            preserve_null: false,
570        });
571        self
572    }
573
574    /// Add a $unwind stage with null preservation.
575    pub fn unwind_preserve_null(mut self, path: impl Into<String>) -> Self {
576        self.stages.push(PipelineStage::Unwind {
577            path: path.into(),
578            preserve_null: true,
579        });
580        self
581    }
582
583    /// Add a $lookup stage.
584    pub fn lookup(
585        mut self,
586        from: impl Into<String>,
587        local_field: impl Into<String>,
588        foreign_field: impl Into<String>,
589        r#as: impl Into<String>,
590    ) -> Self {
591        self.stages.push(PipelineStage::Lookup {
592            from: from.into(),
593            local_field: local_field.into(),
594            foreign_field: foreign_field.into(),
595            r#as: r#as.into(),
596        });
597        self
598    }
599
600    /// Add a $addFields stage.
601    pub fn add_fields(mut self, fields: impl Into<String>) -> Self {
602        self.stages.push(PipelineStage::AddFields(fields.into()));
603        self
604    }
605
606    /// Add a $set stage.
607    pub fn set(mut self, fields: impl Into<String>) -> Self {
608        self.stages.push(PipelineStage::Set(fields.into()));
609        self
610    }
611
612    /// Add a $unset stage.
613    pub fn unset<I, S>(mut self, fields: I) -> Self
614    where
615        I: IntoIterator<Item = S>,
616        S: Into<String>,
617    {
618        self.stages.push(PipelineStage::Unset(
619            fields.into_iter().map(Into::into).collect(),
620        ));
621        self
622    }
623
624    /// Add a $replaceRoot stage.
625    pub fn replace_root(mut self, new_root: impl Into<String>) -> Self {
626        self.stages
627            .push(PipelineStage::ReplaceRoot(new_root.into()));
628        self
629    }
630
631    /// Add a $count stage.
632    pub fn count(mut self, field: impl Into<String>) -> Self {
633        self.stages.push(PipelineStage::Count(field.into()));
634        self
635    }
636
637    /// Add a $sample stage.
638    pub fn sample(mut self, size: u64) -> Self {
639        self.stages.push(PipelineStage::Sample(size));
640        self
641    }
642
643    /// Add a $merge output stage.
644    pub fn merge_into(mut self, collection: impl Into<String>) -> Self {
645        self.stages.push(PipelineStage::Merge {
646            into: collection.into(),
647            on: None,
648            when_matched: None,
649            when_not_matched: None,
650        });
651        self
652    }
653
654    /// Add a $merge output stage with options.
655    pub fn merge(
656        mut self,
657        into: impl Into<String>,
658        on: Option<String>,
659        when_matched: Option<String>,
660        when_not_matched: Option<String>,
661    ) -> Self {
662        self.stages.push(PipelineStage::Merge {
663            into: into.into(),
664            on,
665            when_matched,
666            when_not_matched,
667        });
668        self
669    }
670
671    /// Add a $out stage.
672    pub fn out(mut self, collection: impl Into<String>) -> Self {
673        self.stages.push(PipelineStage::Out(collection.into()));
674        self
675    }
676
677    /// Add a raw BSON stage.
678    pub fn raw(mut self, stage: impl Into<String>) -> Self {
679        self.stages.push(PipelineStage::Raw(stage.into()));
680        self
681    }
682
683    /// Enable disk use for large operations.
684    pub fn with_disk_use(mut self) -> Self {
685        self.allow_disk_use = true;
686        self
687    }
688
689    /// Set cursor batch size.
690    pub fn with_batch_size(mut self, size: u32) -> Self {
691        self.batch_size = Some(size);
692        self
693    }
694
695    /// Set maximum execution time.
696    pub fn with_max_time(mut self, ms: u64) -> Self {
697        self.max_time_ms = Some(ms);
698        self
699    }
700
701    /// Add a comment for profiling.
702    pub fn with_comment(mut self, comment: impl Into<String>) -> Self {
703        self.comment = Some(comment.into());
704        self
705    }
706
707    /// Get the number of stages.
708    pub fn stage_count(&self) -> usize {
709        self.stages.len()
710    }
711
712    /// Build the pipeline as a JSON array string.
713    pub fn build(&self) -> String {
714        let stages: Vec<String> = self.stages.iter().map(|s| s.to_json()).collect();
715        format!("[{}]", stages.join(", "))
716    }
717
718    /// Get the stages.
719    pub fn stages(&self) -> &[PipelineStage] {
720        &self.stages
721    }
722}
723
724impl PipelineStage {
725    /// Convert to JSON representation.
726    pub fn to_json(&self) -> String {
727        match self {
728            Self::Match(filter) => format!(r#"{{ "$match": {} }}"#, filter),
729            Self::Project(proj) => format!(r#"{{ "$project": {} }}"#, proj),
730            Self::Group { id, accumulators } => {
731                format!(r#"{{ "$group": {{ "_id": {}, {} }} }}"#, id, accumulators)
732            }
733            Self::Sort(sort) => format!(r#"{{ "$sort": {} }}"#, sort),
734            Self::Limit(n) => format!(r#"{{ "$limit": {} }}"#, n),
735            Self::Skip(n) => format!(r#"{{ "$skip": {} }}"#, n),
736            Self::Unwind {
737                path,
738                preserve_null,
739            } => {
740                if *preserve_null {
741                    format!(
742                        r#"{{ "$unwind": {{ "path": "{}", "preserveNullAndEmptyArrays": true }} }}"#,
743                        path
744                    )
745                } else {
746                    format!(r#"{{ "$unwind": "{}" }}"#, path)
747                }
748            }
749            Self::Lookup {
750                from,
751                local_field,
752                foreign_field,
753                r#as,
754            } => {
755                format!(
756                    r#"{{ "$lookup": {{ "from": "{}", "localField": "{}", "foreignField": "{}", "as": "{}" }} }}"#,
757                    from, local_field, foreign_field, r#as
758                )
759            }
760            Self::AddFields(fields) => format!(r#"{{ "$addFields": {} }}"#, fields),
761            Self::Set(fields) => format!(r#"{{ "$set": {} }}"#, fields),
762            Self::Unset(fields) => {
763                let quoted: Vec<_> = fields.iter().map(|f| format!(r#""{}""#, f)).collect();
764                format!(r#"{{ "$unset": [{}] }}"#, quoted.join(", "))
765            }
766            Self::ReplaceRoot(root) => {
767                format!(r#"{{ "$replaceRoot": {{ "newRoot": {} }} }}"#, root)
768            }
769            Self::Count(field) => format!(r#"{{ "$count": "{}" }}"#, field),
770            Self::Facet(facets) => {
771                let facet_strs: Vec<_> = facets
772                    .iter()
773                    .map(|(name, stages)| {
774                        let pipeline: Vec<_> = stages.iter().map(|s| s.to_json()).collect();
775                        format!(r#""{}": [{}]"#, name, pipeline.join(", "))
776                    })
777                    .collect();
778                format!(r#"{{ "$facet": {{ {} }} }}"#, facet_strs.join(", "))
779            }
780            Self::Bucket {
781                group_by,
782                boundaries,
783                default,
784                output,
785            } => {
786                let mut parts = vec![
787                    format!(r#""groupBy": {}"#, group_by),
788                    format!(r#""boundaries": {}"#, boundaries),
789                ];
790                if let Some(def) = default {
791                    parts.push(format!(r#""default": {}"#, def));
792                }
793                if let Some(out) = output {
794                    parts.push(format!(r#""output": {}"#, out));
795                }
796                format!(r#"{{ "$bucket": {{ {} }} }}"#, parts.join(", "))
797            }
798            Self::Sample(size) => format!(r#"{{ "$sample": {{ "size": {} }} }}"#, size),
799            Self::Merge {
800                into,
801                on,
802                when_matched,
803                when_not_matched,
804            } => {
805                let mut parts = vec![format!(r#""into": "{}""#, into)];
806                if let Some(on_field) = on {
807                    parts.push(format!(r#""on": "{}""#, on_field));
808                }
809                if let Some(matched) = when_matched {
810                    parts.push(format!(r#""whenMatched": "{}""#, matched));
811                }
812                if let Some(not_matched) = when_not_matched {
813                    parts.push(format!(r#""whenNotMatched": "{}""#, not_matched));
814                }
815                format!(r#"{{ "$merge": {{ {} }} }}"#, parts.join(", "))
816            }
817            Self::Out(collection) => format!(r#"{{ "$out": "{}" }}"#, collection),
818            Self::Raw(stage) => stage.clone(),
819        }
820    }
821}
822
823// ==============================================================================
824// Query Plan Hints
825// ==============================================================================
826
827/// Query plan hints for optimizing complex queries.
828///
829/// These hints are applied to queries to guide the query planner:
830/// - Index hints to force specific index usage
831/// - Parallelism settings
832/// - Join strategies
833/// - Materialization preferences
834///
835/// # Database Support
836///
837/// | Hint Type | PostgreSQL | MySQL | SQLite | MSSQL |
838/// |-----------|------------|-------|--------|-------|
839/// | Index     | ✅ (GUC)   | ✅    | ✅     | ✅    |
840/// | Parallel  | ✅         | ❌    | ❌     | ✅    |
841/// | Join      | ✅         | ✅    | ❌     | ✅    |
842/// | CTE Mat   | ✅         | ❌    | ❌     | ❌    |
843///
844/// # Example
845///
846/// ```rust
847/// use prax_query::db_optimize::QueryHints;
848/// use prax_query::sql::DatabaseType;
849///
850/// let hints = QueryHints::new()
851///     .index_hint("users_email_idx")
852///     .parallel(4)
853///     .no_seq_scan();
854///
855/// let sql = hints.apply_to_query("SELECT * FROM users WHERE email = $1", DatabaseType::PostgreSQL);
856/// ```
857#[derive(Debug, Clone, Default)]
858pub struct QueryHints {
859    /// Index hints.
860    pub indexes: SmallVec<[IndexHint; 4]>,
861    /// Parallelism level (0 = default, >0 = specific workers).
862    pub parallel_workers: Option<u32>,
863    /// Join method hints.
864    pub join_hints: SmallVec<[JoinHint; 4]>,
865    /// Whether to prevent sequential scans.
866    pub no_seq_scan: bool,
867    /// Whether to prevent index scans.
868    pub no_index_scan: bool,
869    /// CTE materialization preference.
870    pub cte_materialized: Option<bool>,
871    /// Query timeout in milliseconds.
872    pub timeout_ms: Option<u64>,
873    /// Custom database-specific hints.
874    pub custom: Vec<String>,
875}
876
877/// An index hint.
878#[derive(Debug, Clone)]
879pub struct IndexHint {
880    /// Table the index belongs to.
881    pub table: Option<String>,
882    /// Index name.
883    pub index_name: String,
884    /// Hint type.
885    pub hint_type: IndexHintType,
886}
887
888/// Type of index hint.
889#[derive(Debug, Clone, Copy, PartialEq, Eq)]
890pub enum IndexHintType {
891    /// Force use of this index.
892    Use,
893    /// Force ignore of this index.
894    Ignore,
895    /// Prefer this index if possible.
896    Prefer,
897}
898
899/// A join method hint.
900#[derive(Debug, Clone)]
901pub struct JoinHint {
902    /// Tables involved in the join.
903    pub tables: Vec<String>,
904    /// Join method to use.
905    pub method: JoinMethod,
906}
907
908/// Join methods.
909#[derive(Debug, Clone, Copy, PartialEq, Eq)]
910pub enum JoinMethod {
911    /// Nested loop join.
912    NestedLoop,
913    /// Hash join.
914    Hash,
915    /// Merge join.
916    Merge,
917}
918
919impl QueryHints {
920    /// Create new empty hints.
921    pub fn new() -> Self {
922        Self::default()
923    }
924
925    /// Add an index hint.
926    pub fn index_hint(mut self, index_name: impl Into<String>) -> Self {
927        self.indexes.push(IndexHint {
928            table: None,
929            index_name: index_name.into(),
930            hint_type: IndexHintType::Use,
931        });
932        self
933    }
934
935    /// Add an index hint for a specific table.
936    pub fn index_hint_for_table(
937        mut self,
938        table: impl Into<String>,
939        index_name: impl Into<String>,
940    ) -> Self {
941        self.indexes.push(IndexHint {
942            table: Some(table.into()),
943            index_name: index_name.into(),
944            hint_type: IndexHintType::Use,
945        });
946        self
947    }
948
949    /// Ignore a specific index.
950    pub fn ignore_index(mut self, index_name: impl Into<String>) -> Self {
951        self.indexes.push(IndexHint {
952            table: None,
953            index_name: index_name.into(),
954            hint_type: IndexHintType::Ignore,
955        });
956        self
957    }
958
959    /// Set parallelism level.
960    pub fn parallel(mut self, workers: u32) -> Self {
961        self.parallel_workers = Some(workers);
962        self
963    }
964
965    /// Disable parallel execution.
966    pub fn no_parallel(mut self) -> Self {
967        self.parallel_workers = Some(0);
968        self
969    }
970
971    /// Prevent sequential scans.
972    pub fn no_seq_scan(mut self) -> Self {
973        self.no_seq_scan = true;
974        self
975    }
976
977    /// Prevent index scans.
978    pub fn no_index_scan(mut self) -> Self {
979        self.no_index_scan = true;
980        self
981    }
982
983    /// Set CTE materialization preference.
984    pub fn cte_materialized(mut self, materialized: bool) -> Self {
985        self.cte_materialized = Some(materialized);
986        self
987    }
988
989    /// Force nested loop join.
990    pub fn nested_loop_join(mut self, tables: Vec<String>) -> Self {
991        self.join_hints.push(JoinHint {
992            tables,
993            method: JoinMethod::NestedLoop,
994        });
995        self
996    }
997
998    /// Force hash join.
999    pub fn hash_join(mut self, tables: Vec<String>) -> Self {
1000        self.join_hints.push(JoinHint {
1001            tables,
1002            method: JoinMethod::Hash,
1003        });
1004        self
1005    }
1006
1007    /// Force merge join.
1008    pub fn merge_join(mut self, tables: Vec<String>) -> Self {
1009        self.join_hints.push(JoinHint {
1010            tables,
1011            method: JoinMethod::Merge,
1012        });
1013        self
1014    }
1015
1016    /// Set query timeout.
1017    pub fn timeout(mut self, ms: u64) -> Self {
1018        self.timeout_ms = Some(ms);
1019        self
1020    }
1021
1022    /// Add a custom database-specific hint.
1023    pub fn custom_hint(mut self, hint: impl Into<String>) -> Self {
1024        self.custom.push(hint.into());
1025        self
1026    }
1027
1028    /// Generate hints as SQL prefix for the given database.
1029    pub fn to_sql_prefix(&self, db_type: DatabaseType) -> String {
1030        match db_type {
1031            DatabaseType::PostgreSQL => self.to_postgres_prefix(),
1032            DatabaseType::MySQL => self.to_mysql_prefix(),
1033            DatabaseType::SQLite => self.to_sqlite_prefix(),
1034            DatabaseType::MSSQL => self.to_mssql_prefix(),
1035        }
1036    }
1037
1038    /// Generate hints as SQL suffix (for query options).
1039    pub fn to_sql_suffix(&self, db_type: DatabaseType) -> String {
1040        match db_type {
1041            DatabaseType::MySQL => self.to_mysql_suffix(),
1042            DatabaseType::MSSQL => self.to_mssql_suffix(),
1043            _ => String::new(),
1044        }
1045    }
1046
1047    /// Apply hints to a query.
1048    pub fn apply_to_query(&self, query: &str, db_type: DatabaseType) -> String {
1049        let prefix = self.to_sql_prefix(db_type);
1050        let suffix = self.to_sql_suffix(db_type);
1051
1052        if prefix.is_empty() && suffix.is_empty() {
1053            return query.to_string();
1054        }
1055
1056        let mut result = String::with_capacity(prefix.len() + query.len() + suffix.len() + 2);
1057        if !prefix.is_empty() {
1058            result.push_str(&prefix);
1059            result.push('\n');
1060        }
1061        result.push_str(query);
1062        if !suffix.is_empty() {
1063            result.push(' ');
1064            result.push_str(&suffix);
1065        }
1066        result
1067    }
1068
1069    fn to_postgres_prefix(&self) -> String {
1070        let mut settings: Vec<String> = Vec::new();
1071
1072        if self.no_seq_scan {
1073            settings.push("SET LOCAL enable_seqscan = off;".to_string());
1074        }
1075        if self.no_index_scan {
1076            settings.push("SET LOCAL enable_indexscan = off;".to_string());
1077        }
1078        if let Some(workers) = self.parallel_workers {
1079            settings.push(format!(
1080                "SET LOCAL max_parallel_workers_per_gather = {};",
1081                workers
1082            ));
1083        }
1084        if let Some(ms) = self.timeout_ms {
1085            settings.push(format!("SET LOCAL statement_timeout = {};", ms));
1086        }
1087
1088        // Join hints
1089        for hint in &self.join_hints {
1090            match hint.method {
1091                JoinMethod::NestedLoop => {
1092                    settings.push("SET LOCAL enable_hashjoin = off;".to_string());
1093                    settings.push("SET LOCAL enable_mergejoin = off;".to_string());
1094                }
1095                JoinMethod::Hash => {
1096                    settings.push("SET LOCAL enable_nestloop = off;".to_string());
1097                    settings.push("SET LOCAL enable_mergejoin = off;".to_string());
1098                }
1099                JoinMethod::Merge => {
1100                    settings.push("SET LOCAL enable_nestloop = off;".to_string());
1101                    settings.push("SET LOCAL enable_hashjoin = off;".to_string());
1102                }
1103            }
1104        }
1105
1106        // Custom hints
1107        for hint in &self.custom {
1108            settings.push(hint.clone());
1109        }
1110
1111        settings.join("\n")
1112    }
1113
1114    fn to_mysql_prefix(&self) -> String {
1115        // MySQL uses inline hints, not SET statements
1116        String::new()
1117    }
1118
1119    fn to_mysql_suffix(&self) -> String {
1120        let mut hints: Vec<String> = Vec::new();
1121
1122        // Index hints (applied after table name in actual query, but we return as hint comment)
1123        for hint in &self.indexes {
1124            let hint_type = match hint.hint_type {
1125                IndexHintType::Use => "USE INDEX",
1126                IndexHintType::Ignore => "IGNORE INDEX",
1127                IndexHintType::Prefer => "FORCE INDEX",
1128            };
1129            if let Some(ref table) = hint.table {
1130                hints.push(format!(
1131                    "/* {} FOR {} ({}) */",
1132                    hint_type, table, hint.index_name
1133                ));
1134            } else {
1135                hints.push(format!("/* {} ({}) */", hint_type, hint.index_name));
1136            }
1137        }
1138
1139        // Join hints
1140        for hint in &self.join_hints {
1141            let method = match hint.method {
1142                JoinMethod::NestedLoop => "BNL",
1143                JoinMethod::Hash => "HASH_JOIN",
1144                JoinMethod::Merge => "MERGE",
1145            };
1146            hints.push(format!("/* {}({}) */", method, hint.tables.join(", ")));
1147        }
1148
1149        hints.join(" ")
1150    }
1151
1152    fn to_sqlite_prefix(&self) -> String {
1153        // SQLite has limited hint support
1154        String::new()
1155    }
1156
1157    fn to_mssql_prefix(&self) -> String {
1158        // MSSQL uses inline OPTION hints
1159        String::new()
1160    }
1161
1162    fn to_mssql_suffix(&self) -> String {
1163        let mut options: Vec<String> = Vec::new();
1164
1165        // Index hints
1166        for hint in &self.indexes {
1167            match hint.hint_type {
1168                IndexHintType::Use => {
1169                    if let Some(ref table) = hint.table {
1170                        options.push(format!("TABLE HINT({}, INDEX({}))", table, hint.index_name));
1171                    }
1172                }
1173                IndexHintType::Ignore => {
1174                    // MSSQL doesn't have ignore index, skip
1175                }
1176                IndexHintType::Prefer => {
1177                    if let Some(ref table) = hint.table {
1178                        options.push(format!(
1179                            "TABLE HINT({}, FORCESEEK({}))",
1180                            table, hint.index_name
1181                        ));
1182                    }
1183                }
1184            }
1185        }
1186
1187        // Parallelism
1188        if let Some(workers) = self.parallel_workers {
1189            if workers == 0 {
1190                options.push("MAXDOP 1".to_string());
1191            } else {
1192                options.push(format!("MAXDOP {}", workers));
1193            }
1194        }
1195
1196        // Join hints
1197        for hint in &self.join_hints {
1198            let method = match hint.method {
1199                JoinMethod::NestedLoop => "LOOP JOIN",
1200                JoinMethod::Hash => "HASH JOIN",
1201                JoinMethod::Merge => "MERGE JOIN",
1202            };
1203            options.push(method.to_string());
1204        }
1205
1206        if options.is_empty() {
1207            String::new()
1208        } else {
1209            format!("OPTION ({})", options.join(", "))
1210        }
1211    }
1212
1213    /// Check if any hints are configured.
1214    pub fn has_hints(&self) -> bool {
1215        !self.indexes.is_empty()
1216            || self.parallel_workers.is_some()
1217            || !self.join_hints.is_empty()
1218            || self.no_seq_scan
1219            || self.no_index_scan
1220            || self.cte_materialized.is_some()
1221            || self.timeout_ms.is_some()
1222            || !self.custom.is_empty()
1223    }
1224}
1225
1226// ==============================================================================
1227// Tests
1228// ==============================================================================
1229
1230#[cfg(test)]
1231mod tests {
1232    use super::*;
1233
1234    #[test]
1235    fn test_prepared_statement_cache() {
1236        let cache = PreparedStatementCache::new(10);
1237
1238        // First access - miss
1239        let stmt1 = cache.get_or_create("test", || "SELECT * FROM users".to_string());
1240        assert_eq!(stmt1.sql, "SELECT * FROM users");
1241
1242        let stats = cache.stats();
1243        assert_eq!(stats.misses, 1);
1244        assert_eq!(stats.hits, 0);
1245
1246        // Second access - hit
1247        let stmt2 = cache.get_or_create("test", || panic!("Should not be called"));
1248        assert_eq!(stmt2.sql, "SELECT * FROM users");
1249
1250        let stats = cache.stats();
1251        assert_eq!(stats.misses, 1);
1252        assert_eq!(stats.hits, 1);
1253        assert!(stats.hit_rate() > 0.0);
1254    }
1255
1256    #[test]
1257    fn test_batch_config_auto_tune() {
1258        // Small dataset
1259        let config = BatchConfig::auto_tune(DatabaseType::PostgreSQL, 100, 50);
1260        assert_eq!(config.batch_size, 50); // No batching needed
1261
1262        // Medium dataset
1263        let config = BatchConfig::auto_tune(DatabaseType::PostgreSQL, 500, 5000);
1264        assert!(config.batch_size >= 100);
1265        assert!(config.batch_size <= 5000);
1266
1267        // Large dataset
1268        let config = BatchConfig::auto_tune(DatabaseType::PostgreSQL, 200, 100_000);
1269        assert!(config.use_copy); // Should use COPY for large PG imports
1270        assert!(config.batch_size >= 100);
1271    }
1272
1273    #[test]
1274    fn test_batch_ranges() {
1275        let config = BatchConfig {
1276            batch_size: 100,
1277            ..Default::default()
1278        };
1279
1280        let ranges: Vec<_> = config.batch_ranges(250).collect();
1281        assert_eq!(ranges.len(), 3);
1282        assert_eq!(ranges[0], (0, 100));
1283        assert_eq!(ranges[1], (100, 200));
1284        assert_eq!(ranges[2], (200, 250));
1285    }
1286
1287    #[test]
1288    fn test_mongo_pipeline_builder() {
1289        let pipeline = MongoPipelineBuilder::new()
1290            .match_stage(r#"{ "status": "active" }"#)
1291            .lookup("orders", "user_id", "_id", "user_orders")
1292            .unwind("$user_orders")
1293            .group(r#""$user_id""#, r#""total": { "$sum": "$amount" }"#)
1294            .sort(r#"{ "total": -1 }"#)
1295            .limit(10)
1296            .build();
1297
1298        assert!(pipeline.contains("$match"));
1299        assert!(pipeline.contains("$lookup"));
1300        assert!(pipeline.contains("$unwind"));
1301        assert!(pipeline.contains("$group"));
1302        assert!(pipeline.contains("$sort"));
1303        assert!(pipeline.contains("$limit"));
1304    }
1305
1306    #[test]
1307    fn test_query_hints_postgres() {
1308        let hints = QueryHints::new().no_seq_scan().parallel(4).timeout(5000);
1309
1310        let prefix = hints.to_sql_prefix(DatabaseType::PostgreSQL);
1311        assert!(prefix.contains("enable_seqscan = off"));
1312        assert!(prefix.contains("max_parallel_workers_per_gather = 4"));
1313        assert!(prefix.contains("statement_timeout = 5000"));
1314    }
1315
1316    #[test]
1317    fn test_query_hints_mssql() {
1318        let hints = QueryHints::new()
1319            .parallel(2)
1320            .hash_join(vec!["users".to_string(), "orders".to_string()]);
1321
1322        let suffix = hints.to_sql_suffix(DatabaseType::MSSQL);
1323        assert!(suffix.contains("MAXDOP 2"));
1324        assert!(suffix.contains("HASH JOIN"));
1325    }
1326
1327    #[test]
1328    fn test_query_hints_apply() {
1329        let hints = QueryHints::new().no_seq_scan();
1330
1331        let query = "SELECT * FROM users WHERE id = $1";
1332        let result = hints.apply_to_query(query, DatabaseType::PostgreSQL);
1333
1334        assert!(result.contains("enable_seqscan = off"));
1335        assert!(result.contains("SELECT * FROM users"));
1336    }
1337}
prax_query/db_optimize.rs

prax_query/
db_optimize.rs