Skip to main content

sqry_db/
lib.rs

1//! `sqry-db`: Salsa-style incremental computation engine for sqry.
2//!
3//! This crate provides a demand-driven memoized query engine with file-level
4//! dependency tracking, three-tier cache invalidation, and incremental indexing
5//! support. All analysis queries (callers, callees, dependency impact, unused
6//! detection, etc.) are expressed as [`DerivedQuery`] implementations whose
7//! results are cached in a 64-shard [`ShardedCache`] and automatically
8//! invalidated when their input dependencies change.
9//!
10//! # Architecture
11//!
12//! ```text
13//! ┌─────────────┐      ┌──────────────────┐      ┌──────────────���─┐
14//! │  MCP / CLI   │─────▶│     QueryDb      │─────▶│  GraphSnapshot │
15//! │  handlers    │      │  (ShardedCache)   │      │  (immutable)   │
16//! └─────────────┘      └──────────────────┘      └────���───────────┘
17//!                             │                          │
18//!                       ┌─────┴──────┐           ┌──────┴───────┐
19//!                       │ FileInput  │           │  NodeArena   │
20//!                       │   Store    │           │  EdgeStore   │
21//!                       └────────────┘           └──────────────┘
22//! ```
23//!
24//! # Three-Tier Invalidation
25//!
26//! - **Tier 1 — File-level deps**: Every query records the `FileId` of every
27//!   node it reads. Invalidated when any recorded file's revision bumps.
28//! - **Tier 2 — Global edge revision**: A single `AtomicU64` that increments
29//!   when *any* file's edges change. Reverse queries (`callers_of`,
30//!   `is_node_in_cycle`) set `TRACKS_EDGE_REVISION = true`.
31//! - **Tier 3 — Global metadata revision**: A single `AtomicU64` that
32//!   increments when node metadata changes. `find_unused` sets
33//!   `TRACKS_METADATA_REVISION = true`.
34//!
35//! # Usage
36//!
37//! ```rust,ignore
38//! use sqry_db::{QueryDb, QueryDbConfig};
39//!
40//! let config = QueryDbConfig::default();
41//! let db = QueryDb::new(snapshot, config);
42//! let result = db.get::<CallersQuery>(&node_id);
43//! ```
44
45pub mod cache;
46pub mod compaction;
47pub mod comparative;
48pub mod config;
49pub mod dependency;
50pub mod input;
51pub mod persistence;
52pub mod planner;
53pub mod queries;
54pub mod query;
55
56use std::sync::Arc;
57use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
58
59use sqry_core::graph::unified::concurrent::GraphSnapshot;
60
61pub use cache::ShardedCache;
62pub use comparative::{
63    ChangeType, ComparativeQueryDb, DiffOptions, DiffOutput, DiffSummary, NodeChange, NodeLocation,
64};
65pub use config::QueryDbConfig;
66pub use dependency::DependencyRecorderGuard;
67pub use input::{FileInput, FileInputStore};
68pub use persistence::{
69    DERIVED_FORMAT_VERSION, DERIVED_MAGIC, DerivedHeader, DerivedManifest, LoadError, LoadOutcome,
70    PersistedEntry, QueryDeps, SkipReason, StagedEntry, load_derived,
71};
72pub use queries::dispatch::{derived_path, load_derived_opportunistic, make_query_db_cold};
73pub use query::{DerivedQuery, QueryKey, QueryRegistry};
74// `QueryDb` and `QueryDbMetrics` are defined in this module; re-exported
75// at the crate root so callers can `use sqry_db::{QueryDb, QueryDbMetrics}`
76// without qualifying the module path.
77
78/// The central incremental computation database.
79///
80/// `QueryDb` owns the sharded query cache, file-level input store, and global
81/// revision counters. All derived queries are evaluated and cached through
82/// [`QueryDb::get`].
83///
84/// # Concurrency
85///
86/// `QueryDb` is `Send + Sync`. The sharded cache uses `parking_lot::RwLock`
87/// per shard (64 shards) for high-concurrency reads. Global revision counters
88/// use `AtomicU64` with `Ordering::Acquire` / `Ordering::Release` semantics.
89pub struct QueryDb {
90    /// Per-file fact sets with revision counters.
91    inputs: FileInputStore,
92    /// 64-shard query cache.
93    cache: ShardedCache,
94    /// Global edge revision counter.
95    ///
96    /// Incremented when *any* file's edges change during `reindex_files`.
97    /// Queries with `TRACKS_EDGE_REVISION = true` are invalidated when this
98    /// counter advances past their cached `edge_revision` value.
99    edge_revision: AtomicU64,
100    /// Global node metadata revision counter.
101    ///
102    /// Incremented when node metadata (visibility, entry-point classification,
103    /// node kind) changes. Queries with `TRACKS_METADATA_REVISION = true` are
104    /// invalidated when this counter advances.
105    metadata_revision: AtomicU64,
106    /// Query type registry mapping `TypeId` to shard assignments.
107    registry: QueryRegistry,
108    /// Reference to the current graph snapshot.
109    snapshot: Arc<GraphSnapshot>,
110    /// Configuration for cache behavior, compaction thresholds, etc.
111    config: QueryDbConfig,
112    /// Cumulative cache hits — incremented on every `get()` that returns a
113    /// cached value without invoking `Q::execute`.
114    ///
115    /// Exposed for proof-point tests (Phase 3C DB21 Proof 3) and production
116    /// telemetry. Use [`Self::metrics`] to read a coherent snapshot.
117    cache_hits: AtomicU64,
118    /// Cumulative cache misses — incremented on every `get()` that invokes
119    /// `Q::execute` because the entry was missing or its dependency tiers
120    /// failed validation.
121    cache_misses: AtomicU64,
122    /// Guards the cold-load window.
123    ///
124    /// `true` at construction — exactly one successful `load_derived` call is
125    /// permitted per `QueryDb` instance. After a successful load this is set
126    /// to `false` (via `Release` ordering). Subsequent calls to `load_derived`
127    /// return [`persistence::LoadError::AlreadyLoaded`] immediately, without
128    /// reading the file, so double-apply is impossible.
129    ///
130    /// The field is intentionally `pub(crate)` so that the sibling
131    /// `persistence` module can check and flip it atomically.
132    pub(crate) cold_load_allowed: AtomicBool,
133}
134
135/// Lightweight snapshot of `QueryDb` cache-usage counters.
136///
137/// Returned by [`QueryDb::metrics`]. Counters are monotonic and never reset
138/// during the lifetime of a `QueryDb` — tests should read a baseline before
139/// the work under test, run the work, read again, and diff.
140#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
141pub struct QueryDbMetrics {
142    /// Total cache hits served since this `QueryDb` was constructed.
143    pub cache_hits: u64,
144    /// Total cache misses (including dependency-invalidated entries) served
145    /// since this `QueryDb` was constructed.
146    pub cache_misses: u64,
147}
148
149impl QueryDbMetrics {
150    /// Total `get()` calls observed (`cache_hits + cache_misses`).
151    #[inline]
152    #[must_use]
153    pub fn total_gets(&self) -> u64 {
154        self.cache_hits.saturating_add(self.cache_misses)
155    }
156}
157
158impl QueryDb {
159    /// Creates a new `QueryDb` backed by the given graph snapshot.
160    ///
161    /// Every built-in [`DerivedQuery`] is pre-registered so callers of the
162    /// planner executor and the relation / cycle / unused queries never have
163    /// to hand-register them. Extra query types can still be added via
164    /// [`Self::register`].
165    #[must_use]
166    pub fn new(snapshot: Arc<GraphSnapshot>, config: QueryDbConfig) -> Self {
167        let inputs = FileInputStore::from_snapshot(&snapshot);
168        let mut db = Self {
169            inputs,
170            cache: ShardedCache::new(config.shard_count),
171            edge_revision: AtomicU64::new(0),
172            metadata_revision: AtomicU64::new(0),
173            registry: QueryRegistry::new(),
174            snapshot,
175            config,
176            cache_hits: AtomicU64::new(0),
177            cache_misses: AtomicU64::new(0),
178            cold_load_allowed: AtomicBool::new(true),
179        };
180        db.register_builtin_queries();
181        db
182    }
183
184    /// Registers the queries that ship with `sqry-db`.
185    ///
186    /// Called from [`Self::new`]; exposed as a helper so tests can reuse it
187    /// when they set up bespoke `QueryDb` instances.
188    fn register_builtin_queries(&mut self) {
189        // Foundational queries
190        self.registry.register::<queries::SccQuery>();
191        self.registry.register::<queries::CondensationQuery>();
192        self.registry.register::<queries::ReachabilityQuery>();
193        // DB14 relation queries
194        self.registry.register::<queries::CallersQuery>();
195        self.registry.register::<queries::CalleesQuery>();
196        self.registry.register::<queries::ImportsQuery>();
197        self.registry.register::<queries::ExportsQuery>();
198        self.registry.register::<queries::ReferencesQuery>();
199        self.registry.register::<queries::ImplementsQuery>();
200        // DB14 analysis queries
201        self.registry.register::<queries::CyclesQuery>();
202        self.registry.register::<queries::IsInCycleQuery>();
203        self.registry.register::<queries::EntryPointsQuery>();
204        self.registry
205            .register::<queries::ReachableFromEntryPointsQuery>();
206        self.registry.register::<queries::UnusedQuery>();
207        self.registry.register::<queries::IsNodeUnusedQuery>();
208    }
209
210    /// Registers a derived query type for cache routing.
211    ///
212    /// Must be called once per query type before the first `get` call.
213    /// Panics (in debug builds) if the query type is already registered.
214    pub fn register<Q: DerivedQuery>(&mut self) {
215        self.registry.register::<Q>();
216    }
217
218    /// Evaluates a derived query, returning the cached result if valid or
219    /// recomputing and caching on miss/invalidation.
220    ///
221    /// # Cache validation
222    ///
223    /// A cached result is returned only if ALL applicable tiers pass:
224    /// 1. Every `(FileId, revision)` pair in the cached deps matches the
225    ///    current file revision in `FileInputStore`.
226    /// 2. If `Q::TRACKS_EDGE_REVISION`, the cached `edge_revision` matches
227    ///    the current global counter.
228    /// 3. If `Q::TRACKS_METADATA_REVISION`, the cached `metadata_revision`
229    ///    matches the current global counter.
230    pub fn get<Q: DerivedQuery>(&self, key: &Q::Key) -> Q::Value {
231        let query_key = QueryKey::new::<Q>(key);
232        let shard_idx = self.registry.shard_for::<Q>(self.cache.shard_count());
233
234        // Fast path: typed cached value with valid tiers (warm path).
235        if let Some(value) = self
236            .cache
237            .get_if_valid::<Q::Value>(shard_idx, &query_key, |cached| {
238                self.validate_cached_result::<Q>(cached)
239            })
240        {
241            self.cache_hits.fetch_add(1, Ordering::Relaxed);
242            return value;
243        }
244
245        // Cold-load rehydration path: entry may exist as raw-bytes-only, placed
246        // by `ShardedCache::insert_validated` during `load_derived`. If the
247        // tiers still validate, decode the raw bytes into `Q::Value` and
248        // promote the entry to the typed fast-path for subsequent calls. This
249        // is what makes spec §2's "first query after a cold start is free"
250        // actually hold: no recomputation is performed when the rehydrated
251        // entry is valid.
252        if let Some(value) =
253            self.cache
254                .get_cold_if_valid::<Q::Value>(shard_idx, &query_key, |cached| {
255                    self.validate_cached_result::<Q>(cached)
256                })
257        {
258            self.cache_hits.fetch_add(1, Ordering::Relaxed);
259            return value;
260        }
261
262        // Cache miss or invalidated — recompute
263        self.cache_misses.fetch_add(1, Ordering::Relaxed);
264        let guard = DependencyRecorderGuard::new();
265        let value = Q::execute(key, self, &self.snapshot);
266        let file_deps = guard.finish(&self.inputs);
267
268        let edge_rev = if Q::TRACKS_EDGE_REVISION {
269            Some(self.edge_revision.load(Ordering::Acquire))
270        } else {
271            None
272        };
273        let metadata_rev = if Q::TRACKS_METADATA_REVISION {
274            Some(self.metadata_revision.load(Ordering::Acquire))
275        } else {
276            None
277        };
278
279        // insert_query serialises key+value for persistent queries and enforces
280        // the max_entry_size_bytes cap. For non-persistent queries it stores
281        // the typed value with empty raw bytes. For oversized entries it skips
282        // caching entirely (soft skip). The caller's computed value is always
283        // returned regardless of whether caching succeeded.
284        //
285        // Serialisation errors are non-fatal: log a warning and fall back to
286        // the bare untyped insert so the entry is at least cache-warm for the
287        // current session (just not persistent).
288        if let Err(err) = self.cache.insert_query::<Q>(
289            shard_idx,
290            query_key.clone(),
291            key,
292            value.clone(),
293            file_deps.clone(),
294            edge_rev,
295            metadata_rev,
296            &self.config,
297        ) {
298            log::warn!(
299                "sqry-db: failed to serialise cache entry for query_type_id={:#06x}: {err}; \
300                 storing typed value without persistence",
301                Q::QUERY_TYPE_ID,
302            );
303            self.cache.insert(
304                shard_idx,
305                query_key,
306                cache::CachedResult::new(value.clone(), file_deps, edge_rev, metadata_rev),
307            );
308        }
309
310        value
311    }
312
313    /// Returns the current graph snapshot.
314    #[inline]
315    #[must_use]
316    pub fn snapshot(&self) -> &GraphSnapshot {
317        &self.snapshot
318    }
319
320    /// Returns the current graph snapshot as an `Arc`.
321    #[inline]
322    #[must_use]
323    pub fn snapshot_arc(&self) -> Arc<GraphSnapshot> {
324        Arc::clone(&self.snapshot)
325    }
326
327    /// Replaces the current snapshot with a new one.
328    ///
329    /// This does NOT invalidate the cache. Use [`bump_edge_revision`] and/or
330    /// [`bump_metadata_revision`] after updating to trigger invalidation on
331    /// next access.
332    pub fn set_snapshot(&mut self, snapshot: Arc<GraphSnapshot>) {
333        self.snapshot = snapshot;
334    }
335
336    /// Returns a reference to the file input store.
337    #[inline]
338    #[must_use]
339    pub fn inputs(&self) -> &FileInputStore {
340        &self.inputs
341    }
342
343    /// Returns a mutable reference to the file input store.
344    #[inline]
345    pub fn inputs_mut(&mut self) -> &mut FileInputStore {
346        &mut self.inputs
347    }
348
349    /// Returns the current global edge revision counter.
350    #[inline]
351    #[must_use]
352    pub fn edge_revision(&self) -> u64 {
353        self.edge_revision.load(Ordering::Acquire)
354    }
355
356    /// Atomically increments the global edge revision counter and returns the
357    /// new value.
358    pub fn bump_edge_revision(&self) -> u64 {
359        self.edge_revision.fetch_add(1, Ordering::Release) + 1
360    }
361
362    /// Returns the current global metadata revision counter.
363    #[inline]
364    #[must_use]
365    pub fn metadata_revision(&self) -> u64 {
366        self.metadata_revision.load(Ordering::Acquire)
367    }
368
369    /// Atomically increments the global metadata revision counter and returns
370    /// the new value.
371    pub fn bump_metadata_revision(&self) -> u64 {
372        self.metadata_revision.fetch_add(1, Ordering::Release) + 1
373    }
374
375    /// Returns the database configuration.
376    #[inline]
377    #[must_use]
378    pub fn config(&self) -> &QueryDbConfig {
379        &self.config
380    }
381
382    /// Returns a snapshot of the cache-usage counters.
383    ///
384    /// The returned [`QueryDbMetrics`] is a coherent read of the hit and miss
385    /// counters (modulo the usual memory-ordering semantics of two separate
386    /// atomic reads). The counters are monotonic and never reset — callers
387    /// interested in deltas should take a baseline, run the work, and diff.
388    ///
389    /// Used by Phase 3C DB21 Proof 3 to assert "zero recomputation" after a
390    /// cache warm-up, and by production telemetry to emit cache hit rates.
391    #[inline]
392    #[must_use]
393    pub fn metrics(&self) -> QueryDbMetrics {
394        QueryDbMetrics {
395            cache_hits: self.cache_hits.load(Ordering::Relaxed),
396            cache_misses: self.cache_misses.load(Ordering::Relaxed),
397        }
398    }
399
400    /// Invalidates all cached results unconditionally.
401    ///
402    /// This is a heavy operation used for full rebuilds.
403    pub fn invalidate_all(&self) {
404        self.cache.clear_all();
405    }
406
407    /// Returns `true` if a cold-load is still permitted for this `QueryDb`.
408    ///
409    /// `true` at construction. Transitions to `false` after a successful
410    /// [`persistence::load_derived`] call. Once `false`, subsequent
411    /// `load_derived` calls return
412    /// [`persistence::LoadError::AlreadyLoaded`] without reading the file.
413    #[inline]
414    #[must_use]
415    pub fn cold_load_allowed(&self) -> bool {
416        self.cold_load_allowed.load(Ordering::Acquire)
417    }
418
419    /// Apply a validated cold-load staging result. **Infallible by construction.**
420    ///
421    /// Pre-conditions enforced by the caller (`load_derived`):
422    /// - `header.magic == DERIVED_MAGIC`
423    /// - `header.format_version == DERIVED_FORMAT_VERSION`
424    /// - `header.snapshot_sha256` matched the caller-supplied SHA
425    /// - `staged` is the set of valid entries (unknown-ID entries filtered out
426    ///   at validation stage)
427    ///
428    /// Uses only infallible operations:
429    /// - [`AtomicU64::store`] for `edge_revision` / `metadata_revision`
430    /// - [`input::FileInputStore::restore_revisions`] (infallible `HashMap::insert`)
431    /// - [`cache::ShardedCache::insert_validated`] (infallible `HashMap::insert`)
432    pub(crate) fn commit_staged_load(
433        &mut self,
434        header: persistence::DerivedHeader,
435        staged: Vec<persistence::StagedEntry>,
436    ) {
437        // INVARIANT: all calls below are infallible — see spec §5.7
438        // No `?`, no `.map_err(...)`, no `Result`-bearing call is permitted here.
439        // If you find yourself wanting to return Err from this function, that is
440        // a spec violation — move the fallible work into `load_derived` before
441        // the commit boundary.
442
443        // Tier 2: restore global edge revision.
444        self.edge_revision
445            .store(header.edge_revision, Ordering::Release);
446
447        // Tier 3: restore global metadata revision.
448        self.metadata_revision
449            .store(header.metadata_revision, Ordering::Release);
450
451        // Tier 1: restore per-file revision counters.
452        self.inputs.restore_revisions(&header.file_revisions);
453
454        // Warm the cache with raw-byte entries for each staged result.
455        for entry in staged {
456            self.cache.insert_validated(
457                entry.query_type_id,
458                entry.raw_key_bytes.into(),
459                entry.raw_result_bytes.into(),
460                entry.deps,
461            );
462        }
463    }
464
465    /// Yields all persistent cache entries for the SAVE_PATH persistence unit.
466    ///
467    /// Delegates to [`ShardedCache::iter_persistent`]. This crate-internal
468    /// accessor exists to allow `persistence::save_derived` (a sibling module)
469    /// to drive the save loop without exposing the raw `cache` field.
470    pub(crate) fn iter_persistent_cache_entries(
471        &self,
472    ) -> impl Iterator<Item = cache::PersistableEntry> + '_ {
473        self.cache.iter_persistent()
474    }
475
476    /// Validates a cached result against all applicable dependency tiers.
477    fn validate_cached_result<Q: DerivedQuery>(&self, cached: &cache::CachedResult) -> bool {
478        // Tier 1: File-level deps
479        if !cached.validate_file_deps(&self.inputs) {
480            return false;
481        }
482
483        // Tier 2: Global edge revision
484        if Q::TRACKS_EDGE_REVISION {
485            if let Some(cached_rev) = cached.edge_revision() {
486                if cached_rev != self.edge_revision.load(Ordering::Acquire) {
487                    return false;
488                }
489            } else {
490                // Query tracks edge revision but cache entry doesn't have one
491                return false;
492            }
493        }
494
495        // Tier 3: Global metadata revision
496        if Q::TRACKS_METADATA_REVISION {
497            if let Some(cached_rev) = cached.metadata_revision() {
498                if cached_rev != self.metadata_revision.load(Ordering::Acquire) {
499                    return false;
500                }
501            } else {
502                return false;
503            }
504        }
505
506        true
507    }
508}
509
510// SAFETY: All internal state is either atomic or behind parking_lot locks.
511// The `Arc<GraphSnapshot>` is immutable.
512unsafe impl Send for QueryDb {}
513unsafe impl Sync for QueryDb {}
514
515#[cfg(test)]
516mod tests {
517    use super::*;
518    use sqry_core::graph::unified::concurrent::CodeGraph;
519
520    #[test]
521    fn query_db_is_send_sync() {
522        fn assert_send_sync<T: Send + Sync>() {}
523        assert_send_sync::<QueryDb>();
524    }
525
526    /// DB14 + Codex review (2026-04-15): `QueryDb::new` pre-registers every
527    /// built-in `DerivedQuery`. Legacy / test call sites that re-register
528    /// the same built-in after `QueryDb::new()` must be a no-op — the
529    /// registry count must stay stable so duplicate registration does not
530    /// silently alter shard routing or count semantics.
531    #[test]
532    fn query_db_new_built_in_registration_is_stable_under_repeats() {
533        let snapshot = std::sync::Arc::new(CodeGraph::new().snapshot());
534        let mut db = QueryDb::new(snapshot, QueryDbConfig::default());
535
536        let count_after_new = db.registry.registered_count();
537        // Re-register every DB14-shipped built-in. Each call must be a no-op.
538        db.register::<queries::SccQuery>();
539        db.register::<queries::CondensationQuery>();
540        db.register::<queries::ReachabilityQuery>();
541        db.register::<queries::CallersQuery>();
542        db.register::<queries::CalleesQuery>();
543        db.register::<queries::ImportsQuery>();
544        db.register::<queries::ExportsQuery>();
545        db.register::<queries::ReferencesQuery>();
546        db.register::<queries::ImplementsQuery>();
547        db.register::<queries::CyclesQuery>();
548        db.register::<queries::IsInCycleQuery>();
549        db.register::<queries::EntryPointsQuery>();
550        db.register::<queries::ReachableFromEntryPointsQuery>();
551        db.register::<queries::UnusedQuery>();
552        db.register::<queries::IsNodeUnusedQuery>();
553
554        assert_eq!(
555            db.registry.registered_count(),
556            count_after_new,
557            "repeat register of built-ins must not bump count"
558        );
559        // Ensure the count is non-zero (i.e. built-ins were actually
560        // registered by the constructor — a regression where new() forgets
561        // to call register_builtin_queries() would surface here).
562        assert!(
563            count_after_new >= 15,
564            "QueryDb::new must pre-register at least the 15 DB14 built-ins, got {count_after_new}"
565        );
566    }
567}