Skip to main content

xlog_stats/
manager.rs

1//! Statistics manager for GPU-resident relation metadata.
2//!
3//! This module provides the [`StatsManager`] type which maintains statistics for all
4//! GPU-resident relations and their join selectivities. It is the central coordination
5//! point for optimizer cost models and solver heuristics.
6
7use std::collections::HashMap;
8use xlog_core::RelId;
9
10use crate::stats::{ColumnStats, JoinSelectivity, RelationStats};
11
12/// Serializable snapshot of collected statistics.
13///
14/// This is intended for feeding runtime observations back into the compiler/optimizer.
15#[derive(Debug, Clone, Default)]
16pub struct StatsSnapshot {
17    pub relations: Vec<RelationStats>,
18    pub join_selectivities: Vec<JoinSelectivity>,
19    /// Optional mapping from runtime `RelId` to predicate name.
20    ///
21    /// When present, consumers should prefer this over raw `RelId` matching to avoid
22    /// misapplying statistics across different programs where `RelId`s may be reused.
23    pub rel_names: Vec<(RelId, String)>,
24}
25
26/// Manages GPU-resident statistics for all relations.
27///
28/// The `StatsManager` is the central repository for relation statistics and join
29/// selectivity information. It provides methods for:
30///
31/// - Registering new relations and tracking their statistics
32/// - Updating cardinality and access patterns
33/// - Estimating join cardinalities using cached selectivity data
34/// - Managing relation "heat" for LRU-style eviction
35///
36/// # Thread Safety
37///
38/// This type is not thread-safe. For concurrent access, wrap in appropriate
39/// synchronization primitives (e.g., `RwLock`).
40///
41/// # Example
42///
43/// ```ignore
44/// use xlog_stats::StatsManager;
45/// use xlog_core::RelId;
46///
47/// let mut mgr = StatsManager::new();
48///
49/// // Register relations
50/// mgr.register_relation(RelId(1));
51/// mgr.register_relation(RelId(2));
52///
53/// // Update statistics
54/// mgr.update_cardinality(RelId(1), 10_000);
55/// mgr.update_cardinality(RelId(2), 5_000);
56///
57/// // Estimate join cardinality
58/// let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
59/// ```
60#[derive(Debug, Default)]
61pub struct StatsManager {
62    /// Per-relation statistics indexed by RelId
63    relations: HashMap<RelId, RelationStats>,
64    /// Join selectivity cache indexed by (smaller_rel_id, larger_rel_id) for canonical ordering
65    join_selectivities: HashMap<(RelId, RelId), JoinSelectivity>,
66}
67
68impl StatsManager {
69    /// Creates a new empty statistics manager.
70    ///
71    /// # Returns
72    ///
73    /// A new `StatsManager` with no registered relations.
74    pub fn new() -> Self {
75        Self::default()
76    }
77
78    /// Registers a new relation for statistics tracking.
79    ///
80    /// If the relation is already registered, this is a no-op.
81    ///
82    /// # Arguments
83    ///
84    /// * `rel_id` - The unique identifier for the relation
85    pub fn register_relation(&mut self, rel_id: RelId) {
86        self.relations
87            .entry(rel_id)
88            .or_insert_with(|| RelationStats::new(rel_id));
89    }
90
91    /// Create a snapshot of all currently tracked statistics.
92    pub fn snapshot(&self) -> StatsSnapshot {
93        StatsSnapshot {
94            relations: self.relations.values().cloned().collect(),
95            join_selectivities: self.join_selectivities.values().cloned().collect(),
96            rel_names: Vec::new(),
97        }
98    }
99
100    /// Merge a previously captured snapshot into this manager.
101    ///
102    /// Existing entries are overwritten with the snapshot values.
103    pub fn merge_snapshot(&mut self, snapshot: &StatsSnapshot) {
104        for rel in &snapshot.relations {
105            self.register_relation(rel.rel_id);
106            if let Some(stats) = self.relations.get_mut(&rel.rel_id) {
107                *stats = rel.clone();
108            }
109        }
110
111        for js in &snapshot.join_selectivities {
112            self.set_join_selectivity(
113                js.left_rel,
114                js.right_rel,
115                js.left_keys.clone(),
116                js.right_keys.clone(),
117                js.selectivity,
118            );
119        }
120    }
121
122    /// Unregisters a relation, removing all associated statistics.
123    ///
124    /// Also removes any join selectivity entries involving this relation.
125    ///
126    /// # Arguments
127    ///
128    /// * `rel_id` - The relation to unregister
129    ///
130    /// # Returns
131    ///
132    /// The removed statistics if the relation was registered
133    pub fn unregister_relation(&mut self, rel_id: RelId) -> Option<RelationStats> {
134        // Remove join selectivities involving this relation
135        self.join_selectivities
136            .retain(|(left, right), _| *left != rel_id && *right != rel_id);
137
138        self.relations.remove(&rel_id)
139    }
140
141    /// Gets immutable reference to relation statistics.
142    ///
143    /// # Arguments
144    ///
145    /// * `rel_id` - The relation to look up
146    ///
147    /// # Returns
148    ///
149    /// A reference to the statistics if the relation is registered
150    pub fn get_relation_stats(&self, rel_id: RelId) -> Option<&RelationStats> {
151        self.relations.get(&rel_id)
152    }
153
154    /// Gets mutable reference to relation statistics.
155    ///
156    /// # Arguments
157    ///
158    /// * `rel_id` - The relation to look up
159    ///
160    /// # Returns
161    ///
162    /// A mutable reference to the statistics if the relation is registered
163    pub fn get_relation_stats_mut(&mut self, rel_id: RelId) -> Option<&mut RelationStats> {
164        self.relations.get_mut(&rel_id)
165    }
166
167    /// Updates the cardinality (row count) for a relation.
168    ///
169    /// If the relation is not registered, this is a no-op.
170    ///
171    /// # Arguments
172    ///
173    /// * `rel_id` - The relation to update
174    /// * `rows` - The new cardinality estimate
175    pub fn update_cardinality(&mut self, rel_id: RelId, rows: u64) {
176        if let Some(stats) = self.relations.get_mut(&rel_id) {
177            stats.update_cardinality(rows);
178        }
179    }
180
181    /// Updates the byte size estimate for a relation.
182    ///
183    /// If the relation is not registered, this is a no-op.
184    ///
185    /// # Arguments
186    ///
187    /// * `rel_id` - The relation to update
188    /// * `bytes` - The estimated total size in bytes
189    pub fn update_byte_size(&mut self, rel_id: RelId, bytes: u64) {
190        if let Some(stats) = self.relations.get_mut(&rel_id) {
191            stats.update_byte_size(bytes);
192        }
193    }
194
195    /// Records an access to a relation, updating its heat and timestamp.
196    ///
197    /// If the relation is not registered, this is a no-op.
198    ///
199    /// # Arguments
200    ///
201    /// * `rel_id` - The relation that was accessed
202    pub fn record_access(&mut self, rel_id: RelId) {
203        if let Some(stats) = self.relations.get_mut(&rel_id) {
204            stats.record_access();
205        }
206    }
207
208    /// Adds column statistics to a relation.
209    ///
210    /// If the relation is not registered, this is a no-op.
211    ///
212    /// # Arguments
213    ///
214    /// * `rel_id` - The relation to update
215    /// * `col_stats` - The column statistics to add
216    pub fn add_column_stats(&mut self, rel_id: RelId, col_stats: ColumnStats) {
217        if let Some(stats) = self.relations.get_mut(&rel_id) {
218            stats.add_column(col_stats);
219        }
220    }
221
222    /// Estimates the output cardinality for a join between two relations.
223    ///
224    /// Uses cached selectivity if available, otherwise uses a default heuristic.
225    /// The estimation formula is: `left_card * right_card * selectivity`.
226    ///
227    /// # Arguments
228    ///
229    /// * `left_rel` - The left relation in the join
230    /// * `right_rel` - The right relation in the join
231    /// * `left_keys` - Column indices used as join keys on the left (currently for future use)
232    /// * `right_keys` - Column indices used as join keys on the right (currently for future use)
233    ///
234    /// # Returns
235    ///
236    /// The estimated output cardinality (minimum of 1)
237    pub fn estimate_join_cardinality(
238        &self,
239        left_rel: RelId,
240        right_rel: RelId,
241        left_keys: &[usize],
242        right_keys: &[usize],
243    ) -> u64 {
244        // Get cardinalities with sensible defaults
245        let left_card = self
246            .relations
247            .get(&left_rel)
248            .map(|s| s.cardinality)
249            .unwrap_or(1000);
250        let right_card = self
251            .relations
252            .get(&right_rel)
253            .map(|s| s.cardinality)
254            .unwrap_or(1000);
255
256        // Use canonical key ordering for selectivity lookup
257        let key = Self::canonical_join_key(left_rel, right_rel);
258
259        // Try to use cached selectivity
260        if let Some(js) = self.join_selectivities.get(&key) {
261            return js.estimate_output_rows(left_card, right_card);
262        }
263
264        // Try to estimate from column statistics
265        if !left_keys.is_empty() && !right_keys.is_empty() {
266            if let (Some(left_stats), Some(right_stats)) = (
267                self.relations.get(&left_rel),
268                self.relations.get(&right_rel),
269            ) {
270                // Use first key column for selectivity estimation
271                let left_distinct = left_stats
272                    .get_column(left_keys[0])
273                    .map(|c| c.distinct_estimate)
274                    .unwrap_or(0);
275                let right_distinct = right_stats
276                    .get_column(right_keys[0])
277                    .map(|c| c.distinct_estimate)
278                    .unwrap_or(0);
279
280                if left_distinct > 0 && right_distinct > 0 {
281                    let selectivity = JoinSelectivity::estimate_selectivity_from_stats(
282                        left_distinct,
283                        right_distinct,
284                    );
285                    return ((left_card as f64 * right_card as f64 * selectivity) as u64).max(1);
286                }
287            }
288        }
289
290        // Default: assume 10% selectivity as a reasonable heuristic
291        // This is conservative and avoids underestimating join sizes
292        let default_selectivity = 0.1;
293        ((left_card as f64 * right_card as f64 * default_selectivity) as u64).max(1)
294    }
295
296    /// Records the result of a join execution to improve future estimates.
297    ///
298    /// Updates the selectivity model using exponential moving average:
299    /// `new_selectivity = old_selectivity * 0.7 + observed_selectivity * 0.3`
300    ///
301    /// # Arguments
302    ///
303    /// * `left_rel` - The left relation in the join
304    /// * `right_rel` - The right relation in the join
305    /// * `left_keys` - Column indices used as join keys on the left
306    /// * `right_keys` - Column indices used as join keys on the right
307    /// * `input_rows` - Product of input relation cardinalities
308    /// * `output_rows` - Actual output row count
309    pub fn record_join_result(
310        &mut self,
311        left_rel: RelId,
312        right_rel: RelId,
313        left_keys: Vec<usize>,
314        right_keys: Vec<usize>,
315        input_rows: u64,
316        output_rows: u64,
317    ) {
318        let key = Self::canonical_join_key(left_rel, right_rel);
319
320        // Compute observed selectivity
321        let observed_selectivity = if input_rows > 0 {
322            (output_rows as f64 / input_rows as f64).clamp(0.0, 1.0)
323        } else {
324            0.1 // Default when no input
325        };
326
327        // Update or create the selectivity entry
328        let entry = self.join_selectivities.entry(key).or_insert_with(|| {
329            let (canonical_left, canonical_right) = key;
330            JoinSelectivity::new(canonical_left, canonical_right)
331        });
332
333        // Update keys (store in canonical order)
334        let (keys_left, keys_right) = if left_rel <= right_rel {
335            (left_keys, right_keys)
336        } else {
337            (right_keys, left_keys)
338        };
339        entry.left_keys = keys_left;
340        entry.right_keys = keys_right;
341
342        // Exponential moving average for selectivity
343        const EMA_OLD_WEIGHT: f64 = 0.7;
344        const EMA_NEW_WEIGHT: f64 = 0.3;
345        entry.selectivity =
346            entry.selectivity * EMA_OLD_WEIGHT + observed_selectivity * EMA_NEW_WEIGHT;
347    }
348
349    /// Set (or overwrite) the join selectivity between two relations.
350    ///
351    /// This is useful for seeding the optimizer from external observations (e.g., runtime stats).
352    pub fn set_join_selectivity(
353        &mut self,
354        left_rel: RelId,
355        right_rel: RelId,
356        left_keys: Vec<usize>,
357        right_keys: Vec<usize>,
358        selectivity: f64,
359    ) {
360        let key = Self::canonical_join_key(left_rel, right_rel);
361        let entry = self.join_selectivities.entry(key).or_insert_with(|| {
362            let (canonical_left, canonical_right) = key;
363            JoinSelectivity::new(canonical_left, canonical_right)
364        });
365
366        // Store keys in canonical order.
367        let (keys_left, keys_right) = if left_rel <= right_rel {
368            (left_keys, right_keys)
369        } else {
370            (right_keys, left_keys)
371        };
372        entry.set_keys(keys_left, keys_right);
373        entry.set_selectivity(selectivity);
374    }
375
376    /// Gets the cached join selectivity between two relations.
377    ///
378    /// # Arguments
379    ///
380    /// * `left_rel` - One relation in the join
381    /// * `right_rel` - The other relation in the join
382    ///
383    /// # Returns
384    ///
385    /// A reference to the cached selectivity if present
386    pub fn get_join_selectivity(
387        &self,
388        left_rel: RelId,
389        right_rel: RelId,
390    ) -> Option<&JoinSelectivity> {
391        let key = Self::canonical_join_key(left_rel, right_rel);
392        self.join_selectivities.get(&key)
393    }
394
395    /// Decays the heat of all relations by a multiplicative factor.
396    ///
397    /// This should be called periodically (e.g., during garbage collection
398    /// or memory pressure events) to allow unused relations to cool down.
399    ///
400    /// # Arguments
401    ///
402    /// * `factor` - Multiplicative decay factor (typically 0.0 to 1.0)
403    pub fn decay_all_heat(&mut self, factor: f32) {
404        for stats in self.relations.values_mut() {
405            stats.decay_heat(factor);
406        }
407    }
408
409    /// Returns the IDs of all "hot" relations above a given heat threshold.
410    ///
411    /// This is useful for identifying frequently accessed relations that should
412    /// be kept in GPU memory.
413    ///
414    /// # Arguments
415    ///
416    /// * `threshold` - The minimum heat value to be considered "hot"
417    ///
418    /// # Returns
419    ///
420    /// A vector of RelIds for all relations with heat >= threshold
421    pub fn hot_relations(&self, threshold: f32) -> Vec<RelId> {
422        self.relations
423            .iter()
424            .filter(|(_, s)| s.heat >= threshold)
425            .map(|(id, _)| *id)
426            .collect()
427    }
428
429    /// Returns the IDs of all "cold" relations below a given heat threshold.
430    ///
431    /// This is useful for identifying candidates for eviction from GPU memory.
432    ///
433    /// # Arguments
434    ///
435    /// * `threshold` - The maximum heat value to be considered "cold"
436    ///
437    /// # Returns
438    ///
439    /// A vector of RelIds for all relations with heat < threshold
440    pub fn cold_relations(&self, threshold: f32) -> Vec<RelId> {
441        self.relations
442            .iter()
443            .filter(|(_, s)| s.heat < threshold)
444            .map(|(id, _)| *id)
445            .collect()
446    }
447
448    /// Returns the total number of registered relations.
449    pub fn relation_count(&self) -> usize {
450        self.relations.len()
451    }
452
453    /// Returns an iterator over all registered relation IDs.
454    pub fn relation_ids(&self) -> impl Iterator<Item = RelId> + '_ {
455        self.relations.keys().copied()
456    }
457
458    /// Returns the total estimated bytes across all relations.
459    pub fn total_byte_size(&self) -> u64 {
460        self.relations.values().map(|s| s.byte_size).sum()
461    }
462
463    /// Returns the total cardinality across all relations.
464    pub fn total_cardinality(&self) -> u64 {
465        self.relations.values().map(|s| s.cardinality).sum()
466    }
467
468    /// Clears all statistics.
469    ///
470    /// Removes all relation statistics and join selectivities.
471    pub fn clear(&mut self) {
472        self.relations.clear();
473        self.join_selectivities.clear();
474    }
475
476    /// Returns canonical key for join selectivity lookup.
477    ///
478    /// Ensures (smaller_id, larger_id) ordering for consistent lookups.
479    #[inline]
480    fn canonical_join_key(left: RelId, right: RelId) -> (RelId, RelId) {
481        if left <= right {
482            (left, right)
483        } else {
484            (right, left)
485        }
486    }
487}
488
489#[cfg(test)]
490mod tests {
491    use super::*;
492    use xlog_core::ScalarType;
493
494    #[test]
495    fn test_stats_manager_new() {
496        let mgr = StatsManager::new();
497        assert!(mgr.get_relation_stats(RelId(1)).is_none());
498        assert_eq!(mgr.relation_count(), 0);
499    }
500
501    #[test]
502    fn test_stats_manager_default() {
503        let mgr = StatsManager::default();
504        assert_eq!(mgr.relation_count(), 0);
505        assert!(mgr.get_relation_stats(RelId(42)).is_none());
506    }
507
508    #[test]
509    fn test_stats_manager_register_relation() {
510        let mut mgr = StatsManager::new();
511        mgr.register_relation(RelId(1));
512        assert!(mgr.get_relation_stats(RelId(1)).is_some());
513        assert_eq!(mgr.relation_count(), 1);
514    }
515
516    #[test]
517    fn test_stats_manager_register_relation_idempotent() {
518        let mut mgr = StatsManager::new();
519        mgr.register_relation(RelId(1));
520        mgr.update_cardinality(RelId(1), 500);
521        mgr.register_relation(RelId(1)); // Should not reset stats
522        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
523        assert_eq!(stats.cardinality, 500);
524    }
525
526    #[test]
527    fn test_stats_manager_register_multiple_relations() {
528        let mut mgr = StatsManager::new();
529        for i in 1..=10 {
530            mgr.register_relation(RelId(i));
531        }
532        assert_eq!(mgr.relation_count(), 10);
533        for i in 1..=10 {
534            assert!(mgr.get_relation_stats(RelId(i)).is_some());
535        }
536    }
537
538    #[test]
539    fn test_stats_manager_unregister_relation() {
540        let mut mgr = StatsManager::new();
541        mgr.register_relation(RelId(1));
542        mgr.update_cardinality(RelId(1), 1000);
543
544        let removed = mgr.unregister_relation(RelId(1));
545        assert!(removed.is_some());
546        assert_eq!(removed.unwrap().cardinality, 1000);
547        assert!(mgr.get_relation_stats(RelId(1)).is_none());
548        assert_eq!(mgr.relation_count(), 0);
549    }
550
551    #[test]
552    fn test_stats_manager_unregister_removes_join_selectivities() {
553        let mut mgr = StatsManager::new();
554        mgr.register_relation(RelId(1));
555        mgr.register_relation(RelId(2));
556        mgr.register_relation(RelId(3));
557        mgr.update_cardinality(RelId(1), 1000);
558        mgr.update_cardinality(RelId(2), 500);
559        mgr.update_cardinality(RelId(3), 200);
560
561        // Record join results to create selectivity entries
562        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 500_000, 1000);
563        mgr.record_join_result(RelId(1), RelId(3), vec![0], vec![0], 200_000, 500);
564        mgr.record_join_result(RelId(2), RelId(3), vec![0], vec![0], 100_000, 250);
565
566        assert!(mgr.get_join_selectivity(RelId(1), RelId(2)).is_some());
567        assert!(mgr.get_join_selectivity(RelId(1), RelId(3)).is_some());
568        assert!(mgr.get_join_selectivity(RelId(2), RelId(3)).is_some());
569
570        // Unregister relation 1 - should remove join selectivities with 1
571        mgr.unregister_relation(RelId(1));
572
573        assert!(mgr.get_join_selectivity(RelId(1), RelId(2)).is_none());
574        assert!(mgr.get_join_selectivity(RelId(1), RelId(3)).is_none());
575        // Join between 2 and 3 should still exist
576        assert!(mgr.get_join_selectivity(RelId(2), RelId(3)).is_some());
577    }
578
579    #[test]
580    fn test_stats_manager_update_cardinality() {
581        let mut mgr = StatsManager::new();
582        mgr.register_relation(RelId(1));
583        mgr.update_cardinality(RelId(1), 5000);
584        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
585        assert_eq!(stats.cardinality, 5000);
586    }
587
588    #[test]
589    fn test_stats_manager_update_cardinality_unregistered() {
590        let mut mgr = StatsManager::new();
591        // Should be a no-op for unregistered relation
592        mgr.update_cardinality(RelId(1), 5000);
593        assert!(mgr.get_relation_stats(RelId(1)).is_none());
594    }
595
596    #[test]
597    fn test_stats_manager_update_byte_size() {
598        let mut mgr = StatsManager::new();
599        mgr.register_relation(RelId(1));
600        mgr.update_byte_size(RelId(1), 1024 * 1024);
601        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
602        assert_eq!(stats.byte_size, 1024 * 1024);
603    }
604
605    #[test]
606    fn test_stats_manager_record_access() {
607        let mut mgr = StatsManager::new();
608        mgr.register_relation(RelId(1));
609        mgr.record_access(RelId(1));
610        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
611        assert!(stats.heat > 0.0);
612        assert!(stats.last_access > 0);
613    }
614
615    #[test]
616    fn test_stats_manager_record_access_multiple() {
617        let mut mgr = StatsManager::new();
618        mgr.register_relation(RelId(1));
619
620        for _ in 0..10 {
621            mgr.record_access(RelId(1));
622        }
623
624        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
625        // After 10 accesses, heat should be quite high
626        assert!(stats.heat > 0.5);
627    }
628
629    #[test]
630    fn test_stats_manager_add_column_stats() {
631        let mut mgr = StatsManager::new();
632        mgr.register_relation(RelId(1));
633
634        let mut col_stats = ColumnStats::new(0, ScalarType::I64);
635        col_stats.update_distinct(100);
636        col_stats.update_range(0, 1000);
637        mgr.add_column_stats(RelId(1), col_stats);
638
639        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
640        assert_eq!(stats.column_stats.len(), 1);
641        let col = stats.get_column(0).unwrap();
642        assert_eq!(col.distinct_estimate, 100);
643    }
644
645    #[test]
646    fn test_stats_manager_estimate_join() {
647        let mut mgr = StatsManager::new();
648        mgr.register_relation(RelId(1));
649        mgr.register_relation(RelId(2));
650        mgr.update_cardinality(RelId(1), 1000);
651        mgr.update_cardinality(RelId(2), 500);
652
653        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
654        // Default selectivity assumes 10%: 1000 * 500 * 0.1 = 50000
655        assert!(estimate > 0);
656        assert!(estimate <= 1000 * 500);
657    }
658
659    #[test]
660    fn test_stats_manager_estimate_join_with_column_stats() {
661        let mut mgr = StatsManager::new();
662        mgr.register_relation(RelId(1));
663        mgr.register_relation(RelId(2));
664        mgr.update_cardinality(RelId(1), 1000);
665        mgr.update_cardinality(RelId(2), 500);
666
667        // Add column stats with distinct values
668        let mut col_stats1 = ColumnStats::new(0, ScalarType::I64);
669        col_stats1.update_distinct(100);
670        mgr.add_column_stats(RelId(1), col_stats1);
671
672        let mut col_stats2 = ColumnStats::new(0, ScalarType::I64);
673        col_stats2.update_distinct(50);
674        mgr.add_column_stats(RelId(2), col_stats2);
675
676        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
677        // Selectivity = 1/max(100, 50) = 0.01
678        // Expected: 1000 * 500 * 0.01 = 5000
679        assert_eq!(estimate, 5000);
680    }
681
682    #[test]
683    fn test_stats_manager_estimate_join_unregistered() {
684        let mgr = StatsManager::new();
685        // Should use default cardinality of 1000 for unregistered relations
686        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
687        // 1000 * 1000 * 0.1 = 100000
688        assert_eq!(estimate, 100_000);
689    }
690
691    #[test]
692    fn test_stats_manager_estimate_join_minimum_one() {
693        let mut mgr = StatsManager::new();
694        mgr.register_relation(RelId(1));
695        mgr.register_relation(RelId(2));
696        mgr.update_cardinality(RelId(1), 1);
697        mgr.update_cardinality(RelId(2), 1);
698
699        // Add column stats with high distinct count to make selectivity very low
700        let mut col_stats1 = ColumnStats::new(0, ScalarType::I64);
701        col_stats1.update_distinct(1_000_000);
702        mgr.add_column_stats(RelId(1), col_stats1);
703
704        let mut col_stats2 = ColumnStats::new(0, ScalarType::I64);
705        col_stats2.update_distinct(1_000_000);
706        mgr.add_column_stats(RelId(2), col_stats2);
707
708        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
709        // Should be at least 1
710        assert!(estimate >= 1);
711    }
712
713    #[test]
714    fn test_stats_manager_record_join_result() {
715        let mut mgr = StatsManager::new();
716        mgr.register_relation(RelId(1));
717        mgr.register_relation(RelId(2));
718        mgr.update_cardinality(RelId(1), 1000);
719        mgr.update_cardinality(RelId(2), 500);
720
721        // Record a join result
722        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 500_000, 1000);
723
724        // Should have created a selectivity entry
725        let js = mgr.get_join_selectivity(RelId(1), RelId(2)).unwrap();
726        assert_eq!(js.left_keys, vec![0]);
727        assert_eq!(js.right_keys, vec![0]);
728        // Observed selectivity: 1000/500000 = 0.002
729        // EMA: 1.0 * 0.7 + 0.002 * 0.3 ≈ 0.7006
730        assert!(js.selectivity < 1.0);
731    }
732
733    #[test]
734    fn test_stats_manager_record_join_result_canonical_order() {
735        let mut mgr = StatsManager::new();
736        mgr.register_relation(RelId(1));
737        mgr.register_relation(RelId(2));
738
739        // Record with reverse order - should use canonical key
740        mgr.record_join_result(RelId(2), RelId(1), vec![0], vec![1], 1000, 100);
741
742        // Should be able to look up with either order
743        assert!(mgr.get_join_selectivity(RelId(1), RelId(2)).is_some());
744        assert!(mgr.get_join_selectivity(RelId(2), RelId(1)).is_some());
745
746        // Both lookups should return the same entry
747        let js1 = mgr.get_join_selectivity(RelId(1), RelId(2)).unwrap();
748        let js2 = mgr.get_join_selectivity(RelId(2), RelId(1)).unwrap();
749        assert_eq!(js1.selectivity, js2.selectivity);
750    }
751
752    #[test]
753    fn test_stats_manager_record_join_result_ema_update() {
754        let mut mgr = StatsManager::new();
755        mgr.register_relation(RelId(1));
756        mgr.register_relation(RelId(2));
757
758        // First observation
759        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 1000, 100);
760        let sel1 = mgr
761            .get_join_selectivity(RelId(1), RelId(2))
762            .unwrap()
763            .selectivity;
764
765        // Second observation with different selectivity
766        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 1000, 500);
767        let sel2 = mgr
768            .get_join_selectivity(RelId(1), RelId(2))
769            .unwrap()
770            .selectivity;
771
772        // Selectivity should have moved via EMA
773        assert!(sel2 != sel1);
774    }
775
776    #[test]
777    fn test_stats_manager_decay_all_heat() {
778        let mut mgr = StatsManager::new();
779        mgr.register_relation(RelId(1));
780        mgr.register_relation(RelId(2));
781
782        // Heat up relations
783        for _ in 0..10 {
784            mgr.record_access(RelId(1));
785            mgr.record_access(RelId(2));
786        }
787
788        let heat1_before = mgr.get_relation_stats(RelId(1)).unwrap().heat;
789        let heat2_before = mgr.get_relation_stats(RelId(2)).unwrap().heat;
790
791        mgr.decay_all_heat(0.5);
792
793        let heat1_after = mgr.get_relation_stats(RelId(1)).unwrap().heat;
794        let heat2_after = mgr.get_relation_stats(RelId(2)).unwrap().heat;
795
796        assert!((heat1_after - heat1_before * 0.5).abs() < 0.001);
797        assert!((heat2_after - heat2_before * 0.5).abs() < 0.001);
798    }
799
800    #[test]
801    fn test_stats_manager_hot_relations() {
802        let mut mgr = StatsManager::new();
803        mgr.register_relation(RelId(1));
804        mgr.register_relation(RelId(2));
805        mgr.register_relation(RelId(3));
806
807        // Heat up only relation 1
808        for _ in 0..20 {
809            mgr.record_access(RelId(1));
810        }
811
812        let hot = mgr.hot_relations(0.5);
813        assert_eq!(hot.len(), 1);
814        assert_eq!(hot[0], RelId(1));
815    }
816
817    #[test]
818    fn test_stats_manager_cold_relations() {
819        let mut mgr = StatsManager::new();
820        mgr.register_relation(RelId(1));
821        mgr.register_relation(RelId(2));
822        mgr.register_relation(RelId(3));
823
824        // Heat up only relation 1
825        for _ in 0..20 {
826            mgr.record_access(RelId(1));
827        }
828
829        let cold = mgr.cold_relations(0.5);
830        // Relations 2 and 3 should be cold
831        assert_eq!(cold.len(), 2);
832        assert!(cold.contains(&RelId(2)));
833        assert!(cold.contains(&RelId(3)));
834    }
835
836    #[test]
837    fn test_stats_manager_relation_ids() {
838        let mut mgr = StatsManager::new();
839        mgr.register_relation(RelId(5));
840        mgr.register_relation(RelId(10));
841        mgr.register_relation(RelId(15));
842
843        let ids: Vec<_> = mgr.relation_ids().collect();
844        assert_eq!(ids.len(), 3);
845        assert!(ids.contains(&RelId(5)));
846        assert!(ids.contains(&RelId(10)));
847        assert!(ids.contains(&RelId(15)));
848    }
849
850    #[test]
851    fn test_stats_manager_total_byte_size() {
852        let mut mgr = StatsManager::new();
853        mgr.register_relation(RelId(1));
854        mgr.register_relation(RelId(2));
855        mgr.update_byte_size(RelId(1), 1000);
856        mgr.update_byte_size(RelId(2), 2000);
857
858        assert_eq!(mgr.total_byte_size(), 3000);
859    }
860
861    #[test]
862    fn test_stats_manager_total_cardinality() {
863        let mut mgr = StatsManager::new();
864        mgr.register_relation(RelId(1));
865        mgr.register_relation(RelId(2));
866        mgr.update_cardinality(RelId(1), 1000);
867        mgr.update_cardinality(RelId(2), 2000);
868
869        assert_eq!(mgr.total_cardinality(), 3000);
870    }
871
872    #[test]
873    fn test_stats_manager_clear() {
874        let mut mgr = StatsManager::new();
875        mgr.register_relation(RelId(1));
876        mgr.register_relation(RelId(2));
877        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 1000, 100);
878
879        mgr.clear();
880
881        assert_eq!(mgr.relation_count(), 0);
882        assert!(mgr.get_relation_stats(RelId(1)).is_none());
883        assert!(mgr.get_join_selectivity(RelId(1), RelId(2)).is_none());
884    }
885
886    #[test]
887    fn test_stats_manager_get_relation_stats_mut() {
888        let mut mgr = StatsManager::new();
889        mgr.register_relation(RelId(1));
890
891        if let Some(stats) = mgr.get_relation_stats_mut(RelId(1)) {
892            stats.update_cardinality(999);
893            stats.has_index = true;
894        }
895
896        let stats = mgr.get_relation_stats(RelId(1)).unwrap();
897        assert_eq!(stats.cardinality, 999);
898        assert!(stats.has_index);
899    }
900
901    #[test]
902    fn test_stats_manager_join_estimate_uses_cached_selectivity() {
903        let mut mgr = StatsManager::new();
904        mgr.register_relation(RelId(1));
905        mgr.register_relation(RelId(2));
906        mgr.update_cardinality(RelId(1), 1000);
907        mgr.update_cardinality(RelId(2), 500);
908
909        // Record a join with known selectivity
910        // Observed: 2500 / 500000 = 0.005
911        mgr.record_join_result(RelId(1), RelId(2), vec![0], vec![0], 500_000, 2500);
912
913        // Subsequent estimates should use the cached selectivity
914        let estimate = mgr.estimate_join_cardinality(RelId(1), RelId(2), &[0], &[0]);
915
916        // The cached selectivity is an EMA, initial 1.0 * 0.7 + 0.005 * 0.3 = 0.7015
917        // Estimate = 1000 * 500 * 0.7015 = 350750
918        let js = mgr.get_join_selectivity(RelId(1), RelId(2)).unwrap();
919        let expected = ((1000_f64 * 500_f64 * js.selectivity) as u64).max(1);
920        assert_eq!(estimate, expected);
921    }
922
923    #[test]
924    fn test_stats_manager_set_join_selectivity_canonicalizes_keys() {
925        let mut mgr = StatsManager::new();
926        mgr.register_relation(RelId(1));
927        mgr.register_relation(RelId(2));
928
929        // Set in reverse order; manager should store in canonical (1,2).
930        mgr.set_join_selectivity(RelId(2), RelId(1), vec![3], vec![7], 0.05);
931
932        let js = mgr.get_join_selectivity(RelId(1), RelId(2)).unwrap();
933        assert_eq!(js.left_rel, RelId(1));
934        assert_eq!(js.right_rel, RelId(2));
935        assert_eq!(js.left_keys, vec![7]);
936        assert_eq!(js.right_keys, vec![3]);
937        assert!((js.selectivity - 0.05).abs() < 1e-9);
938    }
939
940    #[test]
941    fn test_stats_manager_snapshot_and_merge() {
942        let mut mgr = StatsManager::new();
943        mgr.register_relation(RelId(1));
944        mgr.update_cardinality(RelId(1), 123);
945        mgr.record_access(RelId(1));
946        mgr.set_join_selectivity(RelId(1), RelId(2), vec![0], vec![0], 0.2);
947
948        let snap = mgr.snapshot();
949
950        let mut mgr2 = StatsManager::new();
951        mgr2.merge_snapshot(&snap);
952
953        let r1 = mgr2.get_relation_stats(RelId(1)).unwrap();
954        assert_eq!(r1.cardinality, 123);
955
956        let js = mgr2.get_join_selectivity(RelId(1), RelId(2)).unwrap();
957        assert_eq!(js.left_keys, vec![0]);
958        assert_eq!(js.right_keys, vec![0]);
959        assert!((js.selectivity - 0.2).abs() < 1e-9);
960    }
961
962    #[test]
963    fn test_canonical_join_key() {
964        assert_eq!(
965            StatsManager::canonical_join_key(RelId(1), RelId(2)),
966            (RelId(1), RelId(2))
967        );
968        assert_eq!(
969            StatsManager::canonical_join_key(RelId(2), RelId(1)),
970            (RelId(1), RelId(2))
971        );
972        assert_eq!(
973            StatsManager::canonical_join_key(RelId(5), RelId(5)),
974            (RelId(5), RelId(5))
975        );
976    }
977}