fsqlite_planner/
decision_contract.rs

1//! Decision Contract for the query planner (bd-1lsfu.6).
2//!
3//! Every planning decision is logged as a structured record with four fields:
4//! - **STATE**: table stats, indexes, WHERE terms observed
5//! - **ACTION**: join order, access paths, estimated costs chosen
6//! - **LOSS**: estimated cost (plan-time) and actual cost (post-execution)
7//! - **CALIBRATION**: actual/estimated ratio, miscalibration alerts
8//!
9//! Records form a BLAKE3-chained append-only log for tamper-evident auditing.
10
11use serde::{Deserialize, Serialize};
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::time::{SystemTime, UNIX_EPOCH};
14
15use crate::{AccessPath, AccessPathKind, IndexInfo, QueryPlan, StatsSource, TableStats};
16
17// ---------------------------------------------------------------------------
18// ID generator
19// ---------------------------------------------------------------------------
20
21static NEXT_CONTRACT_ID: AtomicU64 = AtomicU64::new(1);
22
23fn next_id() -> u64 {
24    NEXT_CONTRACT_ID.fetch_add(1, Ordering::Relaxed)
25}
26
27// ---------------------------------------------------------------------------
28// STATE: what the planner observed
29// ---------------------------------------------------------------------------
30
31/// Summary of table statistics at decision time.
32#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
33pub struct TableStatsSummary {
34    pub name: String,
35    pub n_pages: u64,
36    pub n_rows: u64,
37    pub source: String,
38}
39
40impl From<&TableStats> for TableStatsSummary {
41    fn from(ts: &TableStats) -> Self {
42        Self {
43            name: ts.name.clone(),
44            n_pages: ts.n_pages,
45            n_rows: ts.n_rows,
46            source: match ts.source {
47                StatsSource::Analyze => "analyze".to_owned(),
48                StatsSource::Heuristic => "heuristic".to_owned(),
49            },
50        }
51    }
52}
53
54/// Summary of index metadata at decision time.
55#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
56pub struct IndexSummary {
57    pub name: String,
58    pub table: String,
59    pub columns: Vec<String>,
60    pub unique: bool,
61    pub n_pages: u64,
62}
63
64impl From<&IndexInfo> for IndexSummary {
65    fn from(ii: &IndexInfo) -> Self {
66        Self {
67            name: ii.name.clone(),
68            table: ii.table.clone(),
69            columns: ii.columns.clone(),
70            unique: ii.unique,
71            n_pages: ii.n_pages,
72        }
73    }
74}
75
76/// What the planner observed when making a decision.
77#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
78pub struct PlannerState {
79    /// Table statistics available at plan time.
80    pub tables: Vec<TableStatsSummary>,
81    /// Indexes available at plan time.
82    pub indexes: Vec<IndexSummary>,
83    /// Number of WHERE terms analyzed.
84    pub where_term_count: usize,
85    /// Number of needed columns (None = all).
86    pub needed_column_count: Option<usize>,
87    /// Number of cross-join pairs constraining order.
88    pub cross_join_pairs: usize,
89}
90
91// ---------------------------------------------------------------------------
92// ACTION: what the planner chose
93// ---------------------------------------------------------------------------
94
95/// Summary of a chosen access path.
96#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
97pub struct AccessPathSummary {
98    pub table: String,
99    pub kind: String,
100    pub index: Option<String>,
101    pub estimated_cost: f64,
102    pub estimated_rows: f64,
103}
104
105impl From<&AccessPath> for AccessPathSummary {
106    fn from(ap: &AccessPath) -> Self {
107        Self {
108            table: ap.table.clone(),
109            kind: access_path_kind_label(&ap.kind),
110            index: ap.index.clone(),
111            estimated_cost: ap.estimated_cost,
112            estimated_rows: ap.estimated_rows,
113        }
114    }
115}
116
117/// Human-readable label for an access path kind.
118#[must_use]
119pub fn access_path_kind_label(kind: &AccessPathKind) -> String {
120    match kind {
121        AccessPathKind::FullTableScan => "full_table_scan".to_owned(),
122        AccessPathKind::IndexScanRange { selectivity } => {
123            format!("index_scan_range(sel={selectivity:.3})")
124        }
125        AccessPathKind::IndexScanEquality => "index_scan_equality".to_owned(),
126        AccessPathKind::CoveringIndexScan { selectivity } => {
127            format!("covering_index_scan(sel={selectivity:.3})")
128        }
129        AccessPathKind::RowidLookup => "rowid_lookup".to_owned(),
130    }
131}
132
133/// What the planner chose.
134#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
135pub struct PlannerAction {
136    /// Join order chosen.
137    pub join_order: Vec<String>,
138    /// Access paths with estimated costs per table.
139    pub access_paths: Vec<AccessPathSummary>,
140    /// Total estimated cost in page reads.
141    pub total_estimated_cost: f64,
142    /// Beam width used during search.
143    pub beam_width: usize,
144    /// Whether star-query optimization was applied.
145    pub star_query_detected: bool,
146}
147
148// ---------------------------------------------------------------------------
149// LOSS: cost estimates and actuals
150// ---------------------------------------------------------------------------
151
152/// Actual execution cost (filled post-execution by the caller).
153#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
154pub struct ActualCost {
155    /// Actual page reads during execution.
156    pub page_reads: u64,
157    /// Actual CPU time in microseconds.
158    pub cpu_micros: u64,
159    /// Actual rows returned.
160    pub actual_rows: u64,
161    /// Wall-clock execution time in microseconds.
162    pub wall_time_micros: u64,
163}
164
165/// Cost estimates and (optional) actuals.
166#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
167pub struct PlannerLoss {
168    /// Estimated cost at plan time (page reads).
169    pub estimated_cost: f64,
170    /// Estimated total rows (product of per-table estimates).
171    pub estimated_rows: f64,
172    /// Actual cost after execution. `None` until execution completes.
173    pub actual_cost: Option<ActualCost>,
174}
175
176// ---------------------------------------------------------------------------
177// CALIBRATION: how well the planner's estimate matched reality
178// ---------------------------------------------------------------------------
179
180/// Miscalibration alert level.
181#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
182#[allow(clippy::derive_partial_eq_without_eq)]
183pub enum MiscalibrationAlert {
184    /// Planner overestimated cost by more than the threshold.
185    Overestimate { ratio: f64 },
186    /// Planner underestimated cost by more than the threshold.
187    Underestimate { ratio: f64 },
188}
189
190/// Calibration assessment comparing estimated vs actual cost.
191#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
192pub struct Calibration {
193    /// Calibration ratio: `actual_cost / estimated_cost`.
194    /// A value of 1.0 means perfect calibration.
195    pub ratio: f64,
196    /// Whether this decision was miscalibrated.
197    pub miscalibrated: bool,
198    /// Alert if miscalibrated.
199    pub alert: Option<MiscalibrationAlert>,
200}
201
202/// Threshold for miscalibration alerts (ratio > 5.0 or < 0.2).
203pub const MISCALIBRATION_HIGH: f64 = 5.0;
204/// Inverse of MISCALIBRATION_HIGH.
205pub const MISCALIBRATION_LOW: f64 = 0.2;
206
207/// Compute calibration from estimated and actual page reads.
208///
209/// Returns `None` if estimated cost is zero (no meaningful ratio).
210#[must_use]
211pub fn compute_calibration(estimated_cost: f64, actual_page_reads: u64) -> Option<Calibration> {
212    if estimated_cost <= 0.0 {
213        return None;
214    }
215    let ratio = actual_page_reads as f64 / estimated_cost;
216    let (miscalibrated, alert) = if ratio > MISCALIBRATION_HIGH {
217        (true, Some(MiscalibrationAlert::Underestimate { ratio }))
218    } else if ratio < MISCALIBRATION_LOW {
219        (true, Some(MiscalibrationAlert::Overestimate { ratio }))
220    } else {
221        (false, None)
222    };
223    Some(Calibration {
224        ratio,
225        miscalibrated,
226        alert,
227    })
228}
229
230// ---------------------------------------------------------------------------
231// Decision Contract record
232// ---------------------------------------------------------------------------
233
234/// A complete decision contract: one per query plan produced.
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct DecisionContract {
237    /// Monotonically increasing record ID.
238    pub id: u64,
239    /// SQL query text (may be truncated for very long queries).
240    pub query_text: String,
241    /// When the decision was made (seconds since UNIX epoch).
242    pub timestamp_epoch_secs: u64,
243    /// STATE: what the planner observed.
244    pub state: PlannerState,
245    /// ACTION: what the planner chose.
246    pub action: PlannerAction,
247    /// LOSS: estimated and actual costs.
248    pub loss: PlannerLoss,
249    /// CALIBRATION: computed after execution. `None` until actual cost is set.
250    pub calibration: Option<Calibration>,
251    /// BLAKE3 hash of the previous record (hex). `"0"*64` for first record.
252    pub prev_hash: String,
253    /// BLAKE3 hash of this record (hex).
254    pub record_hash: String,
255}
256
257/// Maximum query text length stored in a contract.
258const MAX_QUERY_TEXT_LEN: usize = 4096;
259const TRUNCATION_SUFFIX: &str = "...[truncated]";
260
261/// The genesis hash used as `prev_hash` for the first record.
262pub const GENESIS_HASH: &str = "0000000000000000000000000000000000000000000000000000000000000000";
263
264/// Compute the BLAKE3 hash of a decision contract's content fields
265/// (everything except `record_hash` itself).
266fn compute_record_hash(contract: &DecisionContract) -> String {
267    let mut hasher = blake3::Hasher::new();
268    hasher.update(&contract.id.to_le_bytes());
269    hasher.update(contract.query_text.as_bytes());
270    hasher.update(&contract.timestamp_epoch_secs.to_le_bytes());
271    hasher.update(contract.prev_hash.as_bytes());
272    // Hash the action summary (deterministic via join order + costs).
273    for table in &contract.action.join_order {
274        hasher.update(table.as_bytes());
275    }
276    hasher.update(&contract.action.total_estimated_cost.to_le_bytes());
277    hasher.update(&contract.loss.estimated_cost.to_le_bytes());
278    hasher.update(&contract.loss.estimated_rows.to_le_bytes());
279    format!("{}", hasher.finalize())
280}
281
282fn truncate_query_text_for_contract(query_text: &str) -> String {
283    if query_text.len() <= MAX_QUERY_TEXT_LEN {
284        return query_text.to_owned();
285    }
286
287    if MAX_QUERY_TEXT_LEN <= TRUNCATION_SUFFIX.len() {
288        let mut end = MAX_QUERY_TEXT_LEN;
289        while end > 0 && !query_text.is_char_boundary(end) {
290            end -= 1;
291        }
292        return query_text[..end].to_owned();
293    }
294
295    let mut end = MAX_QUERY_TEXT_LEN - TRUNCATION_SUFFIX.len();
296    while end > 0 && !query_text.is_char_boundary(end) {
297        end -= 1;
298    }
299
300    let mut truncated = query_text[..end].to_owned();
301    truncated.push_str(TRUNCATION_SUFFIX);
302    truncated
303}
304
305// ---------------------------------------------------------------------------
306// Builder: create a DecisionContract from planner inputs/outputs
307// ---------------------------------------------------------------------------
308
309/// Build a `DecisionContract` from the planner's inputs and output.
310///
311/// The contract is created at plan time with `calibration = None`.
312/// Call [`DecisionContract::record_actual_cost`] after execution to fill in
313/// the calibration fields.
314#[must_use]
315#[allow(clippy::too_many_arguments)]
316pub fn build_contract(
317    query_text: &str,
318    tables: &[TableStats],
319    indexes: &[IndexInfo],
320    where_term_count: usize,
321    needed_column_count: Option<usize>,
322    cross_join_pairs: usize,
323    plan: &QueryPlan,
324    beam_width: usize,
325    star_query_detected: bool,
326    prev_hash: &str,
327) -> DecisionContract {
328    let text = truncate_query_text_for_contract(query_text);
329
330    let estimated_rows: f64 = plan
331        .access_paths
332        .iter()
333        .map(|ap| ap.estimated_rows)
334        .product();
335
336    let mut contract = DecisionContract {
337        id: next_id(),
338        query_text: text,
339        timestamp_epoch_secs: SystemTime::now()
340            .duration_since(UNIX_EPOCH)
341            .unwrap_or_default()
342            .as_secs(),
343        state: PlannerState {
344            tables: tables.iter().map(TableStatsSummary::from).collect(),
345            indexes: indexes.iter().map(IndexSummary::from).collect(),
346            where_term_count,
347            needed_column_count,
348            cross_join_pairs,
349        },
350        action: PlannerAction {
351            join_order: plan.join_order.clone(),
352            access_paths: plan
353                .access_paths
354                .iter()
355                .map(AccessPathSummary::from)
356                .collect(),
357            total_estimated_cost: plan.total_cost,
358            beam_width,
359            star_query_detected,
360        },
361        loss: PlannerLoss {
362            estimated_cost: plan.total_cost,
363            estimated_rows,
364            actual_cost: None,
365        },
366        calibration: None,
367        prev_hash: prev_hash.to_owned(),
368        record_hash: String::new(),
369    };
370    contract.record_hash = compute_record_hash(&contract);
371    contract
372}
373
374impl DecisionContract {
375    /// Record actual execution cost and compute calibration.
376    pub fn record_actual_cost(&mut self, actual: ActualCost) {
377        self.calibration = compute_calibration(self.loss.estimated_cost, actual.page_reads);
378        self.loss.actual_cost = Some(actual);
379        // Recompute hash after filling actual cost (chain integrity
380        // depends on the hash at creation time, so we keep record_hash
381        // stable — calibration is an addendum).
382    }
383
384    /// Whether calibration indicates miscalibration.
385    #[must_use]
386    pub fn is_miscalibrated(&self) -> bool {
387        self.calibration.as_ref().is_some_and(|c| c.miscalibrated)
388    }
389}
390
391// ---------------------------------------------------------------------------
392// Decision Log: append-only, BLAKE3-chained
393// ---------------------------------------------------------------------------
394
395/// Append-only decision log with BLAKE3 chain integrity.
396///
397/// Each record's `prev_hash` points to the preceding record's `record_hash`,
398/// forming a tamper-evident chain. The log can be serialized to JSON for
399/// offline auditing.
400#[derive(Debug, Default)]
401pub struct DecisionLog {
402    decisions: Vec<DecisionContract>,
403    last_hash: String,
404}
405
406impl DecisionLog {
407    /// Create a new empty decision log.
408    #[must_use]
409    pub fn new() -> Self {
410        Self {
411            decisions: Vec::new(),
412            last_hash: GENESIS_HASH.to_owned(),
413        }
414    }
415
416    /// Append a plan decision to the log.
417    ///
418    /// Automatically chains the BLAKE3 hashes.
419    #[allow(clippy::too_many_arguments)]
420    pub fn record_plan(
421        &mut self,
422        query_text: &str,
423        tables: &[TableStats],
424        indexes: &[IndexInfo],
425        where_term_count: usize,
426        needed_column_count: Option<usize>,
427        cross_join_pairs: usize,
428        plan: &QueryPlan,
429        beam_width: usize,
430        star_query_detected: bool,
431    ) -> u64 {
432        let contract = build_contract(
433            query_text,
434            tables,
435            indexes,
436            where_term_count,
437            needed_column_count,
438            cross_join_pairs,
439            plan,
440            beam_width,
441            star_query_detected,
442            &self.last_hash,
443        );
444        let id = contract.id;
445        self.last_hash.clone_from(&contract.record_hash);
446        tracing::debug!(
447            contract_id = id,
448            query = %contract.query_text,
449            estimated_cost = contract.loss.estimated_cost,
450            join_order = ?contract.action.join_order,
451            "decision_contract.recorded"
452        );
453        self.decisions.push(contract);
454        id
455    }
456
457    /// Record actual execution cost for a previously logged decision.
458    ///
459    /// Returns `true` if the contract was found and updated.
460    pub fn record_actual(&mut self, contract_id: u64, actual: ActualCost) -> bool {
461        if let Some(contract) = self.decisions.iter_mut().find(|c| c.id == contract_id) {
462            contract.record_actual_cost(actual);
463            if let Some(ref cal) = contract.calibration {
464                tracing::debug!(
465                    contract_id,
466                    calibration_ratio = cal.ratio,
467                    miscalibrated = cal.miscalibrated,
468                    "decision_contract.calibrated"
469                );
470                if cal.miscalibrated {
471                    tracing::warn!(
472                        contract_id,
473                        calibration_ratio = cal.ratio,
474                        "decision_contract.miscalibration_alert"
475                    );
476                }
477            }
478            true
479        } else {
480            false
481        }
482    }
483
484    /// Number of recorded decisions.
485    #[must_use]
486    pub fn len(&self) -> usize {
487        self.decisions.len()
488    }
489
490    /// Whether the log is empty.
491    #[must_use]
492    pub fn is_empty(&self) -> bool {
493        self.decisions.is_empty()
494    }
495
496    /// Iterate over all decisions.
497    pub fn iter(&self) -> impl Iterator<Item = &DecisionContract> {
498        self.decisions.iter()
499    }
500
501    /// Get a decision by ID.
502    #[must_use]
503    pub fn get(&self, contract_id: u64) -> Option<&DecisionContract> {
504        self.decisions.iter().find(|c| c.id == contract_id)
505    }
506
507    /// Hash of the most recent record (chain tip).
508    #[must_use]
509    pub fn chain_tip_hash(&self) -> &str {
510        &self.last_hash
511    }
512
513    /// Verify BLAKE3 chain integrity: each record's hash matches its content
514    /// and its `prev_hash` matches the preceding record's `record_hash`.
515    #[must_use]
516    pub fn verify_chain_integrity(&self) -> bool {
517        let mut expected_prev = GENESIS_HASH.to_owned();
518        for contract in &self.decisions {
519            if contract.prev_hash != expected_prev {
520                return false;
521            }
522            let computed = compute_record_hash(contract);
523            if contract.record_hash != computed {
524                return false;
525            }
526            expected_prev.clone_from(&contract.record_hash);
527        }
528        true
529    }
530
531    /// Return decisions with calibration data, filtered by miscalibration.
532    #[must_use]
533    pub fn miscalibrated_decisions(&self) -> Vec<&DecisionContract> {
534        self.decisions
535            .iter()
536            .filter(|c| c.is_miscalibrated())
537            .collect()
538    }
539
540    /// Compute aggregate calibration statistics.
541    #[must_use]
542    pub fn calibration_stats(&self) -> CalibrationStats {
543        let calibrated: Vec<f64> = self
544            .decisions
545            .iter()
546            .filter_map(|c| c.calibration.as_ref().map(|cal| cal.ratio))
547            .collect();
548
549        if calibrated.is_empty() {
550            return CalibrationStats::default();
551        }
552
553        let n = calibrated.len();
554        let mean = calibrated.iter().sum::<f64>() / n as f64;
555        let variance = calibrated.iter().map(|r| (r - mean).powi(2)).sum::<f64>() / n as f64;
556        let stddev = variance.sqrt();
557
558        let mut sorted = calibrated;
559        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
560
561        let median = if n % 2 == 0 {
562            f64::midpoint(sorted[n / 2 - 1], sorted[n / 2])
563        } else {
564            sorted[n / 2]
565        };
566
567        let miscalibrated_count = self.miscalibrated_decisions().len();
568
569        CalibrationStats {
570            total_decisions: self.decisions.len(),
571            calibrated_decisions: n,
572            miscalibrated_count,
573            mean_ratio: mean,
574            median_ratio: median,
575            stddev_ratio: stddev,
576            min_ratio: sorted[0],
577            max_ratio: sorted[n - 1],
578        }
579    }
580
581    /// Serialize the entire log to JSON.
582    ///
583    /// # Errors
584    /// Returns error if serialization fails.
585    #[cfg(feature = "decision-log-json")]
586    pub fn to_json(&self) -> Result<String, serde_json::Error> {
587        serde_json::to_string_pretty(&self.decisions)
588    }
589
590    /// Query decisions by time range (epoch seconds, inclusive).
591    #[must_use]
592    pub fn query_by_time_range(&self, start: u64, end: u64) -> Vec<&DecisionContract> {
593        self.decisions
594            .iter()
595            .filter(|c| c.timestamp_epoch_secs >= start && c.timestamp_epoch_secs <= end)
596            .collect()
597    }
598}
599
600// ---------------------------------------------------------------------------
601// Aggregate calibration statistics
602// ---------------------------------------------------------------------------
603
604/// Aggregate statistics over calibrated decisions.
605#[derive(Debug, Clone, Serialize, Deserialize, Default)]
606pub struct CalibrationStats {
607    /// Total decisions in the log (calibrated + uncalibrated).
608    pub total_decisions: usize,
609    /// Decisions with actual cost recorded.
610    pub calibrated_decisions: usize,
611    /// Decisions exceeding miscalibration thresholds.
612    pub miscalibrated_count: usize,
613    /// Mean calibration ratio.
614    pub mean_ratio: f64,
615    /// Median calibration ratio.
616    pub median_ratio: f64,
617    /// Standard deviation of calibration ratios.
618    pub stddev_ratio: f64,
619    /// Minimum calibration ratio.
620    pub min_ratio: f64,
621    /// Maximum calibration ratio.
622    pub max_ratio: f64,
623}
624
625impl CalibrationStats {
626    /// The fraction of calibrated decisions that are miscalibrated.
627    #[must_use]
628    pub fn miscalibration_rate(&self) -> f64 {
629        if self.calibrated_decisions == 0 {
630            return 0.0;
631        }
632        self.miscalibrated_count as f64 / self.calibrated_decisions as f64
633    }
634
635    /// Whether the planner is well-calibrated overall.
636    ///
637    /// Well-calibrated means: median ratio between 0.5 and 2.0,
638    /// and miscalibration rate below 10%.
639    #[must_use]
640    pub fn is_well_calibrated(&self) -> bool {
641        if self.calibrated_decisions == 0 {
642            return true; // No data = no evidence of miscalibration.
643        }
644        (0.5..=2.0).contains(&self.median_ratio) && self.miscalibration_rate() < 0.10
645    }
646}
647
648impl fmt::Display for CalibrationStats {
649    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
650        write!(
651            f,
652            "CalibrationStats {{ decisions: {}/{} calibrated, miscalibrated: {} ({:.1}%), \
653             mean: {:.3}, median: {:.3}, stddev: {:.3}, range: [{:.3}, {:.3}] }}",
654            self.calibrated_decisions,
655            self.total_decisions,
656            self.miscalibrated_count,
657            self.miscalibration_rate() * 100.0,
658            self.mean_ratio,
659            self.median_ratio,
660            self.stddev_ratio,
661            self.min_ratio,
662            self.max_ratio,
663        )
664    }
665}
666
667use std::fmt;
668
669// ---------------------------------------------------------------------------
670// Tests
671// ---------------------------------------------------------------------------
672
673#[cfg(test)]
674mod tests {
675    use super::*;
676
677    fn sample_tables() -> Vec<TableStats> {
678        vec![
679            TableStats {
680                name: "users".to_owned(),
681                n_pages: 100,
682                n_rows: 10_000,
683                source: StatsSource::Analyze,
684            },
685            TableStats {
686                name: "orders".to_owned(),
687                n_pages: 500,
688                n_rows: 100_000,
689                source: StatsSource::Heuristic,
690            },
691        ]
692    }
693
694    fn sample_indexes() -> Vec<IndexInfo> {
695        vec![IndexInfo {
696            name: "idx_orders_user_id".to_owned(),
697            table: "orders".to_owned(),
698            columns: vec!["user_id".to_owned()],
699            unique: false,
700            n_pages: 50,
701            source: StatsSource::Analyze,
702            partial_where: None,
703            expression_columns: vec![],
704        }]
705    }
706
707    fn sample_plan() -> QueryPlan {
708        QueryPlan {
709            join_order: vec!["users".to_owned(), "orders".to_owned()],
710            access_paths: vec![
711                AccessPath {
712                    table: "users".to_owned(),
713                    kind: AccessPathKind::FullTableScan,
714                    index: None,
715                    estimated_cost: 100.0,
716                    estimated_rows: 10_000.0,
717                    time_travel: None,
718                    probe: None,
719                },
720                AccessPath {
721                    table: "orders".to_owned(),
722                    kind: AccessPathKind::IndexScanEquality,
723                    index: Some("idx_orders_user_id".to_owned()),
724                    estimated_cost: 15.0,
725                    estimated_rows: 10.0,
726                    time_travel: None,
727                    probe: None,
728                },
729            ],
730            join_segments: vec![crate::JoinPlanSegment {
731                relations: vec!["users".to_owned(), "orders".to_owned()],
732                operator: crate::JoinOperator::HashJoin,
733                estimated_cost: 115.0,
734                reason: "2-way joins stay on pairwise hash join".to_owned(),
735            }],
736            total_cost: 115.0,
737            morsel_eligibility: None,
738        }
739    }
740
741    #[test]
742    fn build_contract_captures_state_action_loss() {
743        let tables = sample_tables();
744        let indexes = sample_indexes();
745        let plan = sample_plan();
746
747        let contract = build_contract(
748            "SELECT * FROM users JOIN orders ON users.id = orders.user_id",
749            &tables,
750            &indexes,
751            3,
752            None,
753            0,
754            &plan,
755            5,
756            false,
757            GENESIS_HASH,
758        );
759
760        assert_eq!(contract.state.tables.len(), 2);
761        assert_eq!(contract.state.indexes.len(), 1);
762        assert_eq!(contract.state.where_term_count, 3);
763        assert_eq!(contract.action.join_order, vec!["users", "orders"]);
764        assert_eq!(contract.action.access_paths.len(), 2);
765        assert!((contract.loss.estimated_cost - 115.0).abs() < f64::EPSILON);
766        assert!(contract.loss.actual_cost.is_none());
767        assert!(contract.calibration.is_none());
768        assert_ne!(contract.record_hash, "");
769        assert_eq!(contract.prev_hash, GENESIS_HASH);
770    }
771
772    #[test]
773    fn record_actual_cost_computes_calibration() {
774        let tables = sample_tables();
775        let indexes = sample_indexes();
776        let plan = sample_plan();
777
778        let mut contract = build_contract(
779            "SELECT 1",
780            &tables,
781            &indexes,
782            0,
783            None,
784            0,
785            &plan,
786            1,
787            false,
788            GENESIS_HASH,
789        );
790
791        // Estimated cost = 115.0, actual page reads = 120.
792        contract.record_actual_cost(ActualCost {
793            page_reads: 120,
794            cpu_micros: 500,
795            actual_rows: 50,
796            wall_time_micros: 1000,
797        });
798
799        let cal = contract.calibration.as_ref().unwrap();
800        assert!((cal.ratio - 120.0 / 115.0).abs() < 0.01);
801        assert!(!cal.miscalibrated);
802        assert!(!contract.is_miscalibrated());
803    }
804
805    #[test]
806    fn miscalibration_alert_underestimate() {
807        // Estimated 10 page reads, actual 100 → ratio 10.0 > 5.0
808        let cal = compute_calibration(10.0, 100).unwrap();
809        assert!(cal.miscalibrated);
810        assert!(matches!(
811            cal.alert,
812            Some(MiscalibrationAlert::Underestimate { .. })
813        ));
814    }
815
816    #[test]
817    fn miscalibration_alert_overestimate() {
818        // Estimated 1000 page reads, actual 10 → ratio 0.01 < 0.2
819        let cal = compute_calibration(1000.0, 10).unwrap();
820        assert!(cal.miscalibrated);
821        assert!(matches!(
822            cal.alert,
823            Some(MiscalibrationAlert::Overestimate { .. })
824        ));
825    }
826
827    #[test]
828    fn calibration_none_for_zero_estimate() {
829        assert!(compute_calibration(0.0, 100).is_none());
830    }
831
832    #[test]
833    fn test_compute_calibration_boundaries_and_well_calibrated() {
834        // Existing tests cover clearly under/over/zero estimates; this covers
835        // the well-calibrated band, the exact (strict) high threshold, and a
836        // non-positive estimate.
837
838        // Perfect calibration: ratio 1.0 -> not miscalibrated, no alert.
839        let perfect = compute_calibration(50.0, 50).unwrap();
840        assert!((perfect.ratio - 1.0).abs() < 1e-9);
841        assert!(!perfect.miscalibrated);
842        assert!(perfect.alert.is_none());
843
844        // Inside the (0.2, 5.0) band: well-calibrated.
845        let mid = compute_calibration(100.0, 50).unwrap(); // ratio 0.5
846        assert!(!mid.miscalibrated && mid.alert.is_none());
847
848        // The high threshold is checked with strict `> 5.0`, so a ratio of
849        // exactly 5.0 stays well-calibrated.
850        let at_high = compute_calibration(20.0, 100).unwrap(); // ratio 5.0
851        assert!((at_high.ratio - 5.0).abs() < 1e-9);
852        assert!(
853            !at_high.miscalibrated,
854            "ratio == 5.0 should be well-calibrated under a strict > check"
855        );
856
857        // Just past the high threshold -> Underestimate alert.
858        let over_high = compute_calibration(20.0, 101).unwrap(); // ratio 5.05
859        assert!(over_high.miscalibrated);
860        assert!(matches!(
861            over_high.alert,
862            Some(MiscalibrationAlert::Underestimate { .. })
863        ));
864
865        // Clearly below the low threshold -> Overestimate alert.
866        let under_low = compute_calibration(100.0, 10).unwrap(); // ratio 0.1
867        assert!(under_low.miscalibrated);
868        assert!(matches!(
869            under_low.alert,
870            Some(MiscalibrationAlert::Overestimate { .. })
871        ));
872
873        // A non-positive estimate yields no meaningful ratio.
874        assert!(
875            compute_calibration(-5.0, 100).is_none(),
876            "negative estimate -> None"
877        );
878    }
879
880    #[test]
881    fn decision_log_chain_integrity() {
882        let tables = sample_tables();
883        let indexes = sample_indexes();
884        let plan = sample_plan();
885
886        let mut log = DecisionLog::new();
887        log.record_plan("SELECT 1", &tables, &indexes, 0, None, 0, &plan, 1, false);
888        log.record_plan("SELECT 2", &tables, &indexes, 1, None, 0, &plan, 1, false);
889        log.record_plan("SELECT 3", &tables, &indexes, 2, None, 0, &plan, 1, false);
890
891        assert_eq!(log.len(), 3);
892        assert!(log.verify_chain_integrity());
893
894        // Verify chain linkage.
895        assert_eq!(log.decisions[0].prev_hash, GENESIS_HASH);
896        assert_eq!(log.decisions[1].prev_hash, log.decisions[0].record_hash);
897        assert_eq!(log.decisions[2].prev_hash, log.decisions[1].record_hash);
898    }
899
900    #[test]
901    fn chain_tip_hash_tracks_the_latest_record() {
902        // The empty-log genesis case is covered elsewhere; this pins that the
903        // chain tip advances to the most recent record's hash as plans are
904        // logged, and that each record chains onto the previous tip.
905        let tables = sample_tables();
906        let indexes = sample_indexes();
907        let plan = sample_plan();
908        let mut log = DecisionLog::new();
909        assert_eq!(log.chain_tip_hash(), GENESIS_HASH);
910
911        log.record_plan("SELECT 1", &tables, &indexes, 0, None, 0, &plan, 1, false);
912        let tip_after_first = log.chain_tip_hash().to_owned();
913        // The tip is no longer genesis and equals the first record's hash.
914        assert_ne!(tip_after_first, GENESIS_HASH);
915        assert_eq!(tip_after_first, log.decisions[0].record_hash);
916
917        log.record_plan("SELECT 2", &tables, &indexes, 0, None, 0, &plan, 1, false);
918        // The tip advances to the second record, which chains onto the first
919        // (its prev_hash is the previous tip).
920        assert_eq!(log.chain_tip_hash(), log.decisions[1].record_hash);
921        assert_eq!(log.decisions[1].prev_hash, tip_after_first);
922        assert_ne!(log.chain_tip_hash(), tip_after_first);
923    }
924
925    #[test]
926    fn decision_log_tamper_detection() {
927        let tables = sample_tables();
928        let indexes = sample_indexes();
929        let plan = sample_plan();
930
931        let mut log = DecisionLog::new();
932        log.record_plan("SELECT 1", &tables, &indexes, 0, None, 0, &plan, 1, false);
933        log.record_plan("SELECT 2", &tables, &indexes, 0, None, 0, &plan, 1, false);
934
935        assert!(log.verify_chain_integrity());
936
937        // Tamper with first record.
938        log.decisions[0].query_text = "TAMPERED".to_owned();
939        assert!(!log.verify_chain_integrity());
940    }
941
942    #[test]
943    fn decision_log_record_actual_and_stats() {
944        let tables = sample_tables();
945        let indexes = sample_indexes();
946        let plan = sample_plan();
947
948        let mut log = DecisionLog::new();
949        let id1 = log.record_plan("Q1", &tables, &indexes, 0, None, 0, &plan, 1, false);
950        let id2 = log.record_plan("Q2", &tables, &indexes, 0, None, 0, &plan, 1, false);
951
952        // Good calibration for Q1.
953        assert!(log.record_actual(
954            id1,
955            ActualCost {
956                page_reads: 110,
957                cpu_micros: 200,
958                actual_rows: 100,
959                wall_time_micros: 500,
960            }
961        ));
962
963        // Bad calibration for Q2 (massive underestimate).
964        assert!(log.record_actual(
965            id2,
966            ActualCost {
967                page_reads: 10_000,
968                cpu_micros: 5000,
969                actual_rows: 50_000,
970                wall_time_micros: 10_000,
971            }
972        ));
973
974        let stats = log.calibration_stats();
975        assert_eq!(stats.calibrated_decisions, 2);
976        assert_eq!(stats.miscalibrated_count, 1);
977        assert!(!stats.is_well_calibrated());
978
979        let misc = log.miscalibrated_decisions();
980        assert_eq!(misc.len(), 1);
981        assert_eq!(misc[0].query_text, "Q2");
982    }
983
984    #[test]
985    fn calibration_stats_handle_empty_and_uncalibrated_without_nan() {
986        // Empty log: the is_empty guard must return zeroed stats, never a
987        // div-by-zero NaN from the mean/median/stddev computation.
988        let empty = DecisionLog::new();
989        let s = empty.calibration_stats();
990        assert_eq!(s.calibrated_decisions, 0);
991        assert_eq!(s.miscalibrated_count, 0);
992        assert!(
993            !s.mean_ratio.is_nan() && !s.median_ratio.is_nan() && !s.stddev_ratio.is_nan(),
994            "empty log must not produce NaN ratios"
995        );
996        assert!(s.mean_ratio.abs() < f64::EPSILON);
997
998        // Decisions recorded but never given an actual cost contribute no
999        // calibrated samples, so the aggregate stays zeroed (still no NaN).
1000        let tables = sample_tables();
1001        let indexes = sample_indexes();
1002        let plan = sample_plan();
1003        let mut log = DecisionLog::new();
1004        for i in 0..3 {
1005            let _ = log.record_plan(
1006                &format!("Q{i}"),
1007                &tables,
1008                &indexes,
1009                0,
1010                None,
1011                0,
1012                &plan,
1013                1,
1014                false,
1015            );
1016        }
1017        let s = log.calibration_stats();
1018        assert_eq!(
1019            s.calibrated_decisions, 0,
1020            "no actuals recorded -> nothing calibrated"
1021        );
1022        assert!(
1023            !s.mean_ratio.is_nan(),
1024            "uncalibrated log must not produce NaN"
1025        );
1026    }
1027
1028    #[test]
1029    fn calibration_stats_well_calibrated() {
1030        let tables = sample_tables();
1031        let indexes = sample_indexes();
1032        let plan = sample_plan();
1033
1034        let mut log = DecisionLog::new();
1035        // Record 10 well-calibrated decisions.
1036        for i in 0..10 {
1037            let id = log.record_plan(
1038                &format!("Q{i}"),
1039                &tables,
1040                &indexes,
1041                0,
1042                None,
1043                0,
1044                &plan,
1045                1,
1046                false,
1047            );
1048            log.record_actual(
1049                id,
1050                ActualCost {
1051                    page_reads: 115 + i * 2, // Close to estimated 115.
1052                    cpu_micros: 100,
1053                    actual_rows: 50,
1054                    wall_time_micros: 200,
1055                },
1056            );
1057        }
1058
1059        let stats = log.calibration_stats();
1060        assert_eq!(stats.calibrated_decisions, 10);
1061        assert_eq!(stats.miscalibrated_count, 0);
1062        assert!(stats.is_well_calibrated());
1063        assert!((stats.median_ratio - 1.0).abs() < 0.5);
1064    }
1065
1066    #[cfg(feature = "decision-log-json")]
1067    #[test]
1068    fn decision_log_to_json() {
1069        let tables = sample_tables();
1070        let indexes = sample_indexes();
1071        let plan = sample_plan();
1072
1073        let mut log = DecisionLog::new();
1074        log.record_plan("SELECT 1", &tables, &indexes, 0, None, 0, &plan, 1, false);
1075
1076        let json = log.to_json().unwrap();
1077        assert!(json.contains("\"query_text\": \"SELECT 1\""));
1078        assert!(json.contains("\"estimated_cost\""));
1079        assert!(json.contains("\"record_hash\""));
1080
1081        // Verify it deserializes back.
1082        let parsed: Vec<DecisionContract> = serde_json::from_str(&json).unwrap();
1083        assert_eq!(parsed.len(), 1);
1084        assert_eq!(parsed[0].query_text, "SELECT 1");
1085    }
1086
1087    #[test]
1088    fn query_text_truncation() {
1089        let tables = sample_tables();
1090        let indexes = sample_indexes();
1091        let plan = sample_plan();
1092
1093        let long_query = "SELECT ".to_owned() + &"x".repeat(5000);
1094        let contract = build_contract(
1095            &long_query,
1096            &tables,
1097            &indexes,
1098            0,
1099            None,
1100            0,
1101            &plan,
1102            1,
1103            false,
1104            GENESIS_HASH,
1105        );
1106        assert_eq!(contract.query_text.len(), MAX_QUERY_TEXT_LEN);
1107        assert!(contract.query_text.ends_with(TRUNCATION_SUFFIX));
1108    }
1109
1110    #[test]
1111    fn query_text_truncation_preserves_utf8_boundaries() {
1112        let tables = sample_tables();
1113        let indexes = sample_indexes();
1114        let plan = sample_plan();
1115
1116        let long_query = format!("SELECT '{}'", "é".repeat(3000));
1117        let contract = build_contract(
1118            &long_query,
1119            &tables,
1120            &indexes,
1121            0,
1122            None,
1123            0,
1124            &plan,
1125            1,
1126            false,
1127            GENESIS_HASH,
1128        );
1129
1130        assert!(
1131            std::str::from_utf8(contract.query_text.as_bytes()).is_ok(),
1132            "truncated query text must remain valid UTF-8"
1133        );
1134        assert_eq!(contract.query_text.len(), MAX_QUERY_TEXT_LEN);
1135        assert!(contract.query_text.ends_with(TRUNCATION_SUFFIX));
1136    }
1137
1138    #[test]
1139    fn access_path_kind_labels() {
1140        assert_eq!(
1141            access_path_kind_label(&AccessPathKind::FullTableScan),
1142            "full_table_scan"
1143        );
1144        assert_eq!(
1145            access_path_kind_label(&AccessPathKind::IndexScanEquality),
1146            "index_scan_equality"
1147        );
1148        assert_eq!(
1149            access_path_kind_label(&AccessPathKind::RowidLookup),
1150            "rowid_lookup"
1151        );
1152        assert!(
1153            access_path_kind_label(&AccessPathKind::IndexScanRange { selectivity: 0.33 })
1154                .starts_with("index_scan_range")
1155        );
1156        assert!(
1157            access_path_kind_label(&AccessPathKind::CoveringIndexScan { selectivity: 0.5 })
1158                .starts_with("covering_index_scan")
1159        );
1160    }
1161
1162    #[test]
1163    fn access_path_kind_label_formats_selectivity_to_three_decimals() {
1164        // access_path_kind_labels only checks the prefix for the parameterized
1165        // variants; this pins the exact (sel={:.3}) formatting: three decimal
1166        // places with trailing-zero padding, for both Range and Covering scans.
1167        assert_eq!(
1168            access_path_kind_label(&AccessPathKind::IndexScanRange { selectivity: 0.5 }),
1169            "index_scan_range(sel=0.500)"
1170        );
1171        assert_eq!(
1172            access_path_kind_label(&AccessPathKind::CoveringIndexScan { selectivity: 0.25 }),
1173            "covering_index_scan(sel=0.250)"
1174        );
1175        // A value with more than three decimals is rounded to three.
1176        assert_eq!(
1177            access_path_kind_label(&AccessPathKind::IndexScanRange {
1178                selectivity: 0.12345
1179            }),
1180            "index_scan_range(sel=0.123)"
1181        );
1182    }
1183
1184    #[test]
1185    fn table_stats_summary_from() {
1186        let ts = TableStats {
1187            name: "foo".to_owned(),
1188            n_pages: 42,
1189            n_rows: 1000,
1190            source: StatsSource::Analyze,
1191        };
1192        let summary = TableStatsSummary::from(&ts);
1193        assert_eq!(summary.name, "foo");
1194        assert_eq!(summary.n_pages, 42);
1195        assert_eq!(summary.source, "analyze");
1196    }
1197
1198    #[test]
1199    fn query_by_time_range_filters_inclusively() {
1200        // query_by_time_range had no direct test. Its window is inclusive at
1201        // both ends. record_plan stamps the system clock, so overwrite the
1202        // recorded timestamps to fixed values to make the window deterministic.
1203        let tables = sample_tables();
1204        let indexes = sample_indexes();
1205        let plan = sample_plan();
1206        let mut log = DecisionLog::new();
1207        for _ in 0..3 {
1208            log.record_plan("SELECT 1", &tables, &indexes, 0, None, 0, &plan, 1, false);
1209        }
1210        log.decisions[0].timestamp_epoch_secs = 100;
1211        log.decisions[1].timestamp_epoch_secs = 200;
1212        log.decisions[2].timestamp_epoch_secs = 300;
1213
1214        // A window covering only the middle stamp returns exactly that decision.
1215        let mid = log.query_by_time_range(150, 250);
1216        assert_eq!(mid.len(), 1);
1217        assert_eq!(mid[0].timestamp_epoch_secs, 200);
1218
1219        // Inclusive upper/lower bounds: [200, 300] includes both 200 and 300.
1220        assert_eq!(log.query_by_time_range(200, 300).len(), 2);
1221
1222        // The full span returns all three; a disjoint window returns none.
1223        assert_eq!(log.query_by_time_range(100, 300).len(), 3);
1224        assert!(log.query_by_time_range(400, 500).is_empty());
1225
1226        // A zero-width window on an exact stamp still matches (inclusive).
1227        assert_eq!(log.query_by_time_range(200, 200).len(), 1);
1228    }
1229
1230    #[test]
1231    fn decision_log_get_and_query() {
1232        let tables = sample_tables();
1233        let indexes = sample_indexes();
1234        let plan = sample_plan();
1235
1236        let mut log = DecisionLog::new();
1237        let id = log.record_plan("SELECT 1", &tables, &indexes, 0, None, 0, &plan, 1, false);
1238
1239        assert!(log.get(id).is_some());
1240        assert!(log.get(999_999).is_none());
1241        assert!(!log.is_empty());
1242    }
1243
1244    #[test]
1245    fn record_actual_returns_false_for_unknown_contract_id() {
1246        let tables = sample_tables();
1247        let indexes = sample_indexes();
1248        let plan = sample_plan();
1249        let mut log = DecisionLog::new();
1250        let id = log.record_plan("SELECT 1", &tables, &indexes, 0, None, 0, &plan, 1, false);
1251
1252        // A bogus contract id is a safe no-op that reports failure: no panic, and
1253        // the real decision stays uncalibrated.
1254        assert!(!log.record_actual(
1255            id + 1_000_000,
1256            ActualCost {
1257                page_reads: 50,
1258                cpu_micros: 10,
1259                actual_rows: 5,
1260                wall_time_micros: 20,
1261            }
1262        ));
1263        assert_eq!(log.calibration_stats().calibrated_decisions, 0);
1264
1265        // Recording against the real id succeeds and calibrates that decision.
1266        assert!(log.record_actual(
1267            id,
1268            ActualCost {
1269                page_reads: 50,
1270                cpu_micros: 10,
1271                actual_rows: 5,
1272                wall_time_micros: 20,
1273            }
1274        ));
1275        assert_eq!(log.calibration_stats().calibrated_decisions, 1);
1276    }
1277
1278    #[test]
1279    fn empty_log_stats() {
1280        let log = DecisionLog::new();
1281        let stats = log.calibration_stats();
1282        assert_eq!(stats.total_decisions, 0);
1283        assert_eq!(stats.calibrated_decisions, 0);
1284        assert!(stats.is_well_calibrated());
1285        assert_eq!(log.chain_tip_hash(), GENESIS_HASH);
1286    }
1287
1288    #[test]
1289    fn calibration_stats_rate_and_well_calibrated_boundaries() {
1290        let stats = |cal: usize, misc: usize, median: f64| CalibrationStats {
1291            total_decisions: cal,
1292            calibrated_decisions: cal,
1293            miscalibrated_count: misc,
1294            mean_ratio: median,
1295            median_ratio: median,
1296            stddev_ratio: 0.0,
1297            min_ratio: median,
1298            max_ratio: median,
1299        };
1300
1301        // miscalibration_rate: zero calibrated -> 0.0 (guarded, no div-by-zero).
1302        assert!(stats(0, 5, 1.0).miscalibration_rate().abs() < f64::EPSILON);
1303        assert!((stats(10, 3, 1.0).miscalibration_rate() - 0.3).abs() < 1e-9);
1304        assert!((stats(4, 1, 1.0).miscalibration_rate() - 0.25).abs() < 1e-9);
1305
1306        // No data -> well-calibrated (no evidence of miscalibration).
1307        assert!(CalibrationStats::default().is_well_calibrated());
1308
1309        // Median boundaries are inclusive [0.5, 2.0]; rate must stay under 10%.
1310        assert!(stats(100, 0, 1.0).is_well_calibrated());
1311        assert!(
1312            stats(100, 0, 0.5).is_well_calibrated(),
1313            "median == 0.5 is in range"
1314        );
1315        assert!(
1316            stats(100, 0, 2.0).is_well_calibrated(),
1317            "median == 2.0 is in range"
1318        );
1319        assert!(
1320            !stats(100, 0, 0.49).is_well_calibrated(),
1321            "median below 0.5 fails"
1322        );
1323        assert!(
1324            !stats(100, 0, 2.01).is_well_calibrated(),
1325            "median above 2.0 fails"
1326        );
1327
1328        // The miscalibration-rate threshold is strict (< 0.10): exactly 10% fails.
1329        assert!(
1330            stats(100, 9, 1.0).is_well_calibrated(),
1331            "9% with a good median is ok"
1332        );
1333        assert!(
1334            !stats(100, 10, 1.0).is_well_calibrated(),
1335            "exactly 10% is not ok"
1336        );
1337        assert!(
1338            !stats(100, 50, 1.0).is_well_calibrated(),
1339            "high rate overrides a good median"
1340        );
1341    }
1342
1343    #[test]
1344    fn calibration_stats_display() {
1345        let stats = CalibrationStats {
1346            total_decisions: 10,
1347            calibrated_decisions: 8,
1348            miscalibrated_count: 2,
1349            mean_ratio: 1.5,
1350            median_ratio: 1.2,
1351            stddev_ratio: 0.8,
1352            min_ratio: 0.1,
1353            max_ratio: 3.5,
1354        };
1355        let display = format!("{stats}");
1356        assert!(display.contains("8/10"));
1357        assert!(display.contains("miscalibrated: 2"));
1358    }
1359}
fsqlite_planner/decision_contract.rs

fsqlite_planner/
decision_contract.rs