1use xlog_core::{RelId, ScalarType};
8
9#[derive(Debug, Clone)]
15pub struct RelationStats {
16 pub rel_id: RelId,
18 pub cardinality: u64,
20 pub byte_size: u64,
22 pub column_stats: Vec<ColumnStats>,
24 pub heat: f32,
26 pub last_access: u64,
28 pub has_index: bool,
30}
31
32impl RelationStats {
33 pub fn new(rel_id: RelId) -> Self {
41 Self {
42 rel_id,
43 cardinality: 0,
44 byte_size: 0,
45 column_stats: Vec::new(),
46 heat: 0.0,
47 last_access: 0,
48 has_index: false,
49 }
50 }
51
52 pub fn update_cardinality(&mut self, rows: u64) {
60 self.cardinality = rows;
61 }
62
63 pub fn update_byte_size(&mut self, bytes: u64) {
68 self.byte_size = bytes;
69 }
70
71 pub fn record_access(&mut self) {
79 self.heat = self.heat * 0.9 + 0.1;
81 self.last_access = std::time::SystemTime::now()
82 .duration_since(std::time::UNIX_EPOCH)
83 .unwrap_or_default()
84 .as_secs();
85 }
86
87 pub fn decay_heat(&mut self, factor: f32) {
95 self.heat *= factor;
96 }
97
98 pub fn add_column(&mut self, col_stats: ColumnStats) {
103 self.column_stats.push(col_stats);
104 }
105
106 pub fn get_column(&self, col_idx: usize) -> Option<&ColumnStats> {
114 self.column_stats.iter().find(|c| c.col_idx == col_idx)
115 }
116
117 pub fn get_column_mut(&mut self, col_idx: usize) -> Option<&mut ColumnStats> {
125 self.column_stats.iter_mut().find(|c| c.col_idx == col_idx)
126 }
127
128 pub fn estimate_selectivity(&self, estimated_matches: u64) -> f64 {
136 if self.cardinality == 0 {
137 return 1.0;
138 }
139 (estimated_matches as f64 / self.cardinality as f64).clamp(0.0, 1.0)
140 }
141}
142
143#[derive(Debug, Clone)]
149pub struct ColumnStats {
150 pub col_idx: usize,
152 pub dtype: ScalarType,
154 pub null_count: u64,
156 pub distinct_estimate: u64,
158 pub min_value: Option<i64>,
160 pub max_value: Option<i64>,
162 pub avg_width: Option<f32>,
164}
165
166impl ColumnStats {
167 pub fn new(col_idx: usize, dtype: ScalarType) -> Self {
176 Self {
177 col_idx,
178 dtype,
179 null_count: 0,
180 distinct_estimate: 0,
181 min_value: None,
182 max_value: None,
183 avg_width: None,
184 }
185 }
186
187 pub fn update_distinct(&mut self, estimate: u64) {
195 self.distinct_estimate = estimate;
196 }
197
198 pub fn update_range(&mut self, min: i64, max: i64) {
204 self.min_value = Some(min);
205 self.max_value = Some(max);
206 }
207
208 pub fn update_null_count(&mut self, count: u64) {
213 self.null_count = count;
214 }
215
216 pub fn update_avg_width(&mut self, width: f32) {
221 self.avg_width = Some(width);
222 }
223
224 pub fn equality_selectivity(&self, total_rows: u64) -> f64 {
235 if self.distinct_estimate == 0 || total_rows == 0 {
236 return 0.1;
238 }
239 1.0 / self.distinct_estimate as f64
240 }
241
242 pub fn range_selectivity(&self, low: i64, high: i64) -> f64 {
254 match (self.min_value, self.max_value) {
255 (Some(col_min), Some(col_max)) if col_max > col_min => {
256 let col_range = (col_max - col_min) as f64;
257 let effective_low = low.max(col_min);
258 let effective_high = high.min(col_max);
259 if effective_high < effective_low {
260 return 0.0;
261 }
262 let query_range = (effective_high - effective_low) as f64;
263 (query_range / col_range).clamp(0.0, 1.0)
264 }
265 _ => {
266 0.25
268 }
269 }
270 }
271
272 pub fn value_size_bytes(&self) -> usize {
274 self.dtype.size_bytes()
275 }
276}
277
278#[derive(Debug, Clone)]
284pub struct JoinSelectivity {
285 pub left_rel: RelId,
287 pub right_rel: RelId,
289 pub left_keys: Vec<usize>,
291 pub right_keys: Vec<usize>,
293 pub selectivity: f64,
295 pub is_pk_fk: bool,
297 cached_output_estimate: Option<u64>,
299}
300
301impl JoinSelectivity {
302 pub fn new(left_rel: RelId, right_rel: RelId) -> Self {
313 Self {
314 left_rel,
315 right_rel,
316 left_keys: Vec::new(),
317 right_keys: Vec::new(),
318 selectivity: 1.0,
319 is_pk_fk: false,
320 cached_output_estimate: None,
321 }
322 }
323
324 pub fn set_keys(&mut self, left_keys: Vec<usize>, right_keys: Vec<usize>) {
330 debug_assert_eq!(
331 left_keys.len(),
332 right_keys.len(),
333 "Join key counts must match"
334 );
335 self.left_keys = left_keys;
336 self.right_keys = right_keys;
337 self.cached_output_estimate = None;
338 }
339
340 pub fn set_selectivity(&mut self, selectivity: f64) {
345 self.selectivity = selectivity.clamp(0.0, 1.0);
346 self.cached_output_estimate = None;
347 }
348
349 pub fn mark_pk_fk(&mut self) {
354 self.is_pk_fk = true;
355 }
356
357 pub fn estimate_output_rows(&self, left_rows: u64, right_rows: u64) -> u64 {
369 if self.is_pk_fk {
370 return right_rows;
373 }
374 ((left_rows as f64 * right_rows as f64 * self.selectivity) as u64).max(1)
375 }
376
377 pub fn estimate_selectivity_from_stats(left_distinct: u64, right_distinct: u64) -> f64 {
389 if left_distinct == 0 || right_distinct == 0 {
390 return 1.0;
391 }
392 1.0 / left_distinct.max(right_distinct) as f64
393 }
394
395 pub fn update_from_observation(&mut self, left_rows: u64, right_rows: u64, output_rows: u64) {
404 let product = left_rows as f64 * right_rows as f64;
405 if product > 0.0 {
406 self.selectivity = (output_rows as f64 / product).clamp(0.0, 1.0);
407 self.cached_output_estimate = Some(output_rows);
408 }
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415
416 #[test]
417 fn test_relation_stats_new() {
418 let stats = RelationStats::new(RelId(1));
419 assert_eq!(stats.rel_id, RelId(1));
420 assert_eq!(stats.cardinality, 0);
421 assert_eq!(stats.heat, 0.0);
422 assert_eq!(stats.byte_size, 0);
423 assert!(stats.column_stats.is_empty());
424 assert!(!stats.has_index);
425 }
426
427 #[test]
428 fn test_relation_stats_update_cardinality() {
429 let mut stats = RelationStats::new(RelId(1));
430 stats.update_cardinality(1000);
431 assert_eq!(stats.cardinality, 1000);
432 }
433
434 #[test]
435 fn test_relation_stats_update_byte_size() {
436 let mut stats = RelationStats::new(RelId(1));
437 stats.update_byte_size(4096);
438 assert_eq!(stats.byte_size, 4096);
439 }
440
441 #[test]
442 fn test_relation_stats_update_heat() {
443 let mut stats = RelationStats::new(RelId(1));
444 assert_eq!(stats.heat, 0.0);
445
446 stats.record_access();
447 assert!(stats.heat > 0.0);
448 let heat_after_first = stats.heat;
449 assert!((heat_after_first - 0.1).abs() < 0.001);
450
451 stats.record_access();
452 assert!(stats.heat > heat_after_first);
453 assert!((stats.heat - 0.19).abs() < 0.001);
455
456 assert!(stats.last_access > 0);
458 }
459
460 #[test]
461 fn test_relation_stats_decay_heat() {
462 let mut stats = RelationStats::new(RelId(1));
463 stats.record_access();
464 stats.record_access();
465 let initial_heat = stats.heat;
466
467 stats.decay_heat(0.5);
468 assert!((stats.heat - initial_heat * 0.5).abs() < 0.001);
469 }
470
471 #[test]
472 fn test_relation_stats_column_management() {
473 let mut stats = RelationStats::new(RelId(1));
474 let col0 = ColumnStats::new(0, ScalarType::U32);
475 let col1 = ColumnStats::new(1, ScalarType::I64);
476
477 stats.add_column(col0);
478 stats.add_column(col1);
479
480 assert_eq!(stats.column_stats.len(), 2);
481 assert!(stats.get_column(0).is_some());
482 assert!(stats.get_column(1).is_some());
483 assert!(stats.get_column(2).is_none());
484
485 if let Some(col) = stats.get_column_mut(0) {
487 col.update_distinct(100);
488 }
489 assert_eq!(stats.get_column(0).unwrap().distinct_estimate, 100);
490 }
491
492 #[test]
493 fn test_relation_stats_estimate_selectivity() {
494 let mut stats = RelationStats::new(RelId(1));
495 stats.update_cardinality(1000);
496
497 let sel = stats.estimate_selectivity(100);
499 assert!((sel - 0.1).abs() < 0.001);
500
501 let empty_stats = RelationStats::new(RelId(2));
503 assert_eq!(empty_stats.estimate_selectivity(50), 1.0);
504 }
505
506 #[test]
507 fn test_column_stats_new() {
508 let col = ColumnStats::new(0, ScalarType::U32);
509 assert_eq!(col.col_idx, 0);
510 assert_eq!(col.dtype, ScalarType::U32);
511 assert_eq!(col.distinct_estimate, 0);
512 assert_eq!(col.null_count, 0);
513 assert!(col.min_value.is_none());
514 assert!(col.max_value.is_none());
515 assert!(col.avg_width.is_none());
516 }
517
518 #[test]
519 fn test_column_stats_update_distinct() {
520 let mut col = ColumnStats::new(0, ScalarType::U32);
521 col.update_distinct(500);
522 assert_eq!(col.distinct_estimate, 500);
523 }
524
525 #[test]
526 fn test_column_stats_update_range() {
527 let mut col = ColumnStats::new(0, ScalarType::I32);
528 col.update_range(-100, 100);
529 assert_eq!(col.min_value, Some(-100));
530 assert_eq!(col.max_value, Some(100));
531 }
532
533 #[test]
534 fn test_column_stats_update_null_count() {
535 let mut col = ColumnStats::new(0, ScalarType::U32);
536 col.update_null_count(42);
537 assert_eq!(col.null_count, 42);
538 }
539
540 #[test]
541 fn test_column_stats_update_avg_width() {
542 let mut col = ColumnStats::new(0, ScalarType::Symbol);
543 col.update_avg_width(12.5);
544 assert_eq!(col.avg_width, Some(12.5));
545 }
546
547 #[test]
548 fn test_column_stats_equality_selectivity() {
549 let mut col = ColumnStats::new(0, ScalarType::U32);
550 col.update_distinct(100);
551
552 let sel = col.equality_selectivity(1000);
553 assert!((sel - 0.01).abs() < 0.0001); let empty_col = ColumnStats::new(1, ScalarType::U32);
557 assert_eq!(empty_col.equality_selectivity(1000), 0.1); }
559
560 #[test]
561 fn test_column_stats_range_selectivity() {
562 let mut col = ColumnStats::new(0, ScalarType::I64);
563 col.update_range(0, 100);
564
565 let sel = col.range_selectivity(25, 75);
567 assert!((sel - 0.5).abs() < 0.001); let sel_outside = col.range_selectivity(200, 300);
571 assert_eq!(sel_outside, 0.0);
572
573 let sel_partial = col.range_selectivity(50, 150);
575 assert!((sel_partial - 0.5).abs() < 0.001); let empty_col = ColumnStats::new(1, ScalarType::I64);
579 assert_eq!(empty_col.range_selectivity(0, 100), 0.25); }
581
582 #[test]
583 fn test_column_stats_value_size() {
584 assert_eq!(ColumnStats::new(0, ScalarType::U32).value_size_bytes(), 4);
585 assert_eq!(ColumnStats::new(0, ScalarType::U64).value_size_bytes(), 8);
586 assert_eq!(ColumnStats::new(0, ScalarType::Bool).value_size_bytes(), 1);
587 }
588
589 #[test]
590 fn test_join_selectivity_new() {
591 let js = JoinSelectivity::new(RelId(1), RelId(2));
592 assert_eq!(js.left_rel, RelId(1));
593 assert_eq!(js.right_rel, RelId(2));
594 assert!(js.left_keys.is_empty());
595 assert!(js.right_keys.is_empty());
596 assert_eq!(js.selectivity, 1.0);
597 assert!(!js.is_pk_fk);
598 }
599
600 #[test]
601 fn test_join_selectivity_set_keys() {
602 let mut js = JoinSelectivity::new(RelId(1), RelId(2));
603 js.set_keys(vec![0, 1], vec![0, 1]);
604 assert_eq!(js.left_keys, vec![0, 1]);
605 assert_eq!(js.right_keys, vec![0, 1]);
606 }
607
608 #[test]
609 fn test_join_selectivity_set_selectivity() {
610 let mut js = JoinSelectivity::new(RelId(1), RelId(2));
611 js.set_selectivity(0.01);
612 assert!((js.selectivity - 0.01).abs() < 0.0001);
613
614 js.set_selectivity(2.0);
616 assert_eq!(js.selectivity, 1.0);
617
618 js.set_selectivity(-1.0);
619 assert_eq!(js.selectivity, 0.0);
620 }
621
622 #[test]
623 fn test_join_selectivity_estimate_output_rows() {
624 let mut js = JoinSelectivity::new(RelId(1), RelId(2));
625 js.set_selectivity(0.01);
626
627 let output = js.estimate_output_rows(1000, 500);
629 assert_eq!(output, 5000);
630
631 js.set_selectivity(0.0);
633 let output_min = js.estimate_output_rows(10, 10);
634 assert_eq!(output_min, 1);
635 }
636
637 #[test]
638 fn test_join_selectivity_pk_fk() {
639 let mut js = JoinSelectivity::new(RelId(1), RelId(2));
640 js.mark_pk_fk();
641 assert!(js.is_pk_fk);
642
643 let output = js.estimate_output_rows(100, 500);
645 assert_eq!(output, 500); }
647
648 #[test]
649 fn test_join_selectivity_estimate_from_stats() {
650 let sel = JoinSelectivity::estimate_selectivity_from_stats(100, 200);
652 assert!((sel - 0.005).abs() < 0.0001);
653
654 let sel_zero = JoinSelectivity::estimate_selectivity_from_stats(0, 100);
656 assert_eq!(sel_zero, 1.0);
657 }
658
659 #[test]
660 fn test_join_selectivity_update_from_observation() {
661 let mut js = JoinSelectivity::new(RelId(1), RelId(2));
662 js.update_from_observation(1000, 500, 2500);
663
664 assert!((js.selectivity - 0.005).abs() < 0.0001);
666 }
667
668 #[test]
669 fn test_all_scalar_types_column_stats() {
670 let types = [
672 ScalarType::U32,
673 ScalarType::U64,
674 ScalarType::I32,
675 ScalarType::I64,
676 ScalarType::F32,
677 ScalarType::F64,
678 ScalarType::Bool,
679 ScalarType::Symbol,
680 ];
681
682 for (idx, dtype) in types.iter().enumerate() {
683 let col = ColumnStats::new(idx, *dtype);
684 assert_eq!(col.col_idx, idx);
685 assert_eq!(col.dtype, *dtype);
686 assert!(col.value_size_bytes() > 0);
687 }
688 }
689}