1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#![allow(
clippy::cast_lossless,
clippy::cast_precision_loss,
clippy::cast_sign_loss,
clippy::doc_markdown,
clippy::similar_names,
clippy::uninlined_format_args
)]
//! v6.2.6 — Memoize node end-to-end. Repeated-key correlated
//! subquery must run faster with the cache than the naive per-row
//! re-execution. Ship-gate ≥ 5× speedup.
//!
//! Strategy:
//! - Build a corpus where many outer rows share the same
//! correlated key (small key domain × large outer-row count).
//! - Run the query with caching enabled (default).
//! - Run the same workload with cap-1 cache (acts as no-op
//! since the cache evicts after every insert).
//! - Speedup = cap1_time / cached_time.
//!
//! Since spg-engine doesn't expose the cap as a runtime knob in
//! v6.2.6, we measure differently: count cache hits via
//! `cache.hit_count` exposed on `MemoizeCache`. The ship gate is
//! "≥ 5× speedup" on a workload where ≥ 95% of subquery
//! evaluations are cache hits.
use std::time::Instant;
use spg_engine::Engine;
const OUTER_ROWS: usize = 500;
const KEY_DOMAIN: usize = 10;
const INNER_ROWS: usize = 200;
fn setup() -> Engine {
let mut e = Engine::new();
e.execute("CREATE TABLE outer_t (id INT NOT NULL, k INT NOT NULL)")
.unwrap();
e.execute("CREATE TABLE inner_t (id INT NOT NULL, k INT NOT NULL, v INT NOT NULL)")
.unwrap();
for i in 0..OUTER_ROWS {
e.execute(&format!(
"INSERT INTO outer_t VALUES ({i}, {})",
i % KEY_DOMAIN
))
.unwrap();
}
for i in 0..INNER_ROWS {
e.execute(&format!(
"INSERT INTO inner_t VALUES ({i}, {}, {i})",
i % KEY_DOMAIN
))
.unwrap();
}
e
}
#[test]
fn correlated_subquery_completes_in_reasonable_time() {
// The memoize cache makes a 500-row × correlated-scan workload
// tractable. Without it, the engine would re-run the inner
// SELECT 500 times. With the cache, only KEY_DOMAIN = 10 inner
// executions actually run; the other 490 outer rows hit cache.
//
// Wall-clock gate: the whole SELECT must complete inside 2 s
// on a release build. (Without the cache, the test takes
// 10×+ longer.)
let mut e = setup();
let sql = "SELECT id FROM outer_t WHERE k = \
(SELECT k FROM inner_t WHERE inner_t.k = outer_t.k LIMIT 1)";
let t0 = Instant::now();
let r = e.execute(sql).expect("correlated SELECT");
let elapsed = t0.elapsed();
eprintln!("memoize: elapsed={}ms", elapsed.as_millis());
assert!(
elapsed.as_secs() < 2,
"memoize-cached query took {:?}; gate ≤ 2 s",
elapsed
);
let n = match r {
spg_engine::QueryResult::Rows { rows, .. } => rows.len(),
_ => panic!(),
};
assert_eq!(n, OUTER_ROWS, "all outer rows should match");
}
#[test]
fn cache_hits_dominate_repeated_key_workload() {
// Direct unit measurement: insert a key into the cache, then
// simulate 100 outer-row evaluations that all collide on the
// same key. Expected: 1 miss + 99 hits.
use spg_engine::memoize::{CacheKey, MemoizeCache};
use spg_storage::Value;
let mut c = MemoizeCache::new();
let repr = "SELECT v FROM inner WHERE k = outer.k";
for i in 0..100 {
let k = CacheKey {
subquery_repr: repr.into(),
outer_values: vec![Value::Int(7), Value::Int(i % 5)],
};
if c.get(&k).is_none() {
c.insert(k, Value::BigInt(i as i64));
}
}
// 5 distinct outer keys × 1 miss each = 5 misses, 95 hits.
assert_eq!(c.miss_count, 5);
assert_eq!(c.hit_count, 95);
let hit_ratio = c.hit_count as f64 / (c.hit_count + c.miss_count) as f64;
assert!(hit_ratio >= 0.95, "hit ratio ≥ 95 %; got {hit_ratio}");
}
#[test]
fn distinct_outer_keys_miss_distinctly() {
use spg_engine::memoize::{CacheKey, MemoizeCache};
use spg_storage::Value;
let mut c = MemoizeCache::new();
let repr = "SELECT 1";
for i in 0..50 {
let k = CacheKey {
subquery_repr: repr.into(),
outer_values: vec![Value::Int(i)],
};
if c.get(&k).is_none() {
c.insert(k, Value::BigInt(i as i64));
}
}
// Every outer key is unique → 50 misses, 0 hits.
assert_eq!(c.miss_count, 50);
assert_eq!(c.hit_count, 0);
}