sqry_db/lib.rs
1//! `sqry-db`: Salsa-style incremental computation engine for sqry.
2//!
3//! This crate provides a demand-driven memoized query engine with file-level
4//! dependency tracking, three-tier cache invalidation, and incremental indexing
5//! support. All analysis queries (callers, callees, dependency impact, unused
6//! detection, etc.) are expressed as [`DerivedQuery`] implementations whose
7//! results are cached in a 64-shard [`ShardedCache`] and automatically
8//! invalidated when their input dependencies change.
9//!
10//! # Architecture
11//!
12//! ```text
13//! ┌─────────────┐ ┌──────────────────┐ ┌──────────────���─┐
14//! │ MCP / CLI │─────▶│ QueryDb │─────▶│ GraphSnapshot │
15//! │ handlers │ │ (ShardedCache) │ │ (immutable) │
16//! └─────────────┘ └──────────────────┘ └────���───────────┘
17//! │ │
18//! ┌─────┴──────┐ ┌──────┴───────┐
19//! │ FileInput │ │ NodeArena │
20//! │ Store │ │ EdgeStore │
21//! └────────────┘ └──────────────┘
22//! ```
23//!
24//! # Three-Tier Invalidation
25//!
26//! - **Tier 1 — File-level deps**: Every query records the `FileId` of every
27//! node it reads. Invalidated when any recorded file's revision bumps.
28//! - **Tier 2 — Global edge revision**: A single `AtomicU64` that increments
29//! when *any* file's edges change. Reverse queries (`callers_of`,
30//! `is_node_in_cycle`) set `TRACKS_EDGE_REVISION = true`.
31//! - **Tier 3 — Global metadata revision**: A single `AtomicU64` that
32//! increments when node metadata changes. `find_unused` sets
33//! `TRACKS_METADATA_REVISION = true`.
34//!
35//! # Usage
36//!
37//! ```rust,ignore
38//! use sqry_db::{QueryDb, QueryDbConfig};
39//!
40//! let config = QueryDbConfig::default();
41//! let db = QueryDb::new(snapshot, config);
42//! let result = db.get::<CallersQuery>(&node_id);
43//! ```
44
45pub mod cache;
46pub mod compaction;
47pub mod comparative;
48pub mod config;
49pub mod dependency;
50pub mod input;
51pub mod persistence;
52pub mod planner;
53pub mod queries;
54pub mod query;
55
56use std::sync::Arc;
57use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
58
59use sqry_core::graph::unified::concurrent::GraphSnapshot;
60
61pub use cache::ShardedCache;
62pub use comparative::{
63 ChangeType, ComparativeQueryDb, DiffOptions, DiffOutput, DiffSummary, NodeChange, NodeLocation,
64};
65pub use config::QueryDbConfig;
66pub use dependency::DependencyRecorderGuard;
67pub use input::{FileInput, FileInputStore};
68pub use persistence::{
69 DERIVED_FORMAT_VERSION, DERIVED_MAGIC, DerivedHeader, DerivedManifest, LoadError, LoadOutcome,
70 PersistedEntry, QueryDeps, SkipReason, StagedEntry, load_derived,
71};
72pub use queries::dispatch::{derived_path, load_derived_opportunistic, make_query_db_cold};
73pub use query::{DerivedQuery, QueryKey, QueryRegistry};
74// `QueryDb` and `QueryDbMetrics` are defined in this module; re-exported
75// at the crate root so callers can `use sqry_db::{QueryDb, QueryDbMetrics}`
76// without qualifying the module path.
77
78/// The central incremental computation database.
79///
80/// `QueryDb` owns the sharded query cache, file-level input store, and global
81/// revision counters. All derived queries are evaluated and cached through
82/// [`QueryDb::get`].
83///
84/// # Concurrency
85///
86/// `QueryDb` is `Send + Sync`. The sharded cache uses `parking_lot::RwLock`
87/// per shard (64 shards) for high-concurrency reads. Global revision counters
88/// use `AtomicU64` with `Ordering::Acquire` / `Ordering::Release` semantics.
89pub struct QueryDb {
90 /// Per-file fact sets with revision counters.
91 inputs: FileInputStore,
92 /// 64-shard query cache.
93 cache: ShardedCache,
94 /// Global edge revision counter.
95 ///
96 /// Incremented when *any* file's edges change during `reindex_files`.
97 /// Queries with `TRACKS_EDGE_REVISION = true` are invalidated when this
98 /// counter advances past their cached `edge_revision` value.
99 edge_revision: AtomicU64,
100 /// Global node metadata revision counter.
101 ///
102 /// Incremented when node metadata (visibility, entry-point classification,
103 /// node kind) changes. Queries with `TRACKS_METADATA_REVISION = true` are
104 /// invalidated when this counter advances.
105 metadata_revision: AtomicU64,
106 /// Query type registry mapping `TypeId` to shard assignments.
107 registry: QueryRegistry,
108 /// Reference to the current graph snapshot.
109 snapshot: Arc<GraphSnapshot>,
110 /// Configuration for cache behavior, compaction thresholds, etc.
111 config: QueryDbConfig,
112 /// Cumulative cache hits — incremented on every `get()` that returns a
113 /// cached value without invoking `Q::execute`.
114 ///
115 /// Exposed for proof-point tests (Phase 3C DB21 Proof 3) and production
116 /// telemetry. Use [`Self::metrics`] to read a coherent snapshot.
117 cache_hits: AtomicU64,
118 /// Cumulative cache misses — incremented on every `get()` that invokes
119 /// `Q::execute` because the entry was missing or its dependency tiers
120 /// failed validation.
121 cache_misses: AtomicU64,
122 /// Guards the cold-load window.
123 ///
124 /// `true` at construction — exactly one successful `load_derived` call is
125 /// permitted per `QueryDb` instance. After a successful load this is set
126 /// to `false` (via `Release` ordering). Subsequent calls to `load_derived`
127 /// return [`persistence::LoadError::AlreadyLoaded`] immediately, without
128 /// reading the file, so double-apply is impossible.
129 ///
130 /// The field is intentionally `pub(crate)` so that the sibling
131 /// `persistence` module can check and flip it atomically.
132 pub(crate) cold_load_allowed: AtomicBool,
133}
134
135/// Lightweight snapshot of `QueryDb` cache-usage counters.
136///
137/// Returned by [`QueryDb::metrics`]. Counters are monotonic and never reset
138/// during the lifetime of a `QueryDb` — tests should read a baseline before
139/// the work under test, run the work, read again, and diff.
140#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
141pub struct QueryDbMetrics {
142 /// Total cache hits served since this `QueryDb` was constructed.
143 pub cache_hits: u64,
144 /// Total cache misses (including dependency-invalidated entries) served
145 /// since this `QueryDb` was constructed.
146 pub cache_misses: u64,
147}
148
149impl QueryDbMetrics {
150 /// Total `get()` calls observed (`cache_hits + cache_misses`).
151 #[inline]
152 #[must_use]
153 pub fn total_gets(&self) -> u64 {
154 self.cache_hits.saturating_add(self.cache_misses)
155 }
156}
157
158impl QueryDb {
159 /// Creates a new `QueryDb` backed by the given graph snapshot.
160 ///
161 /// Every built-in [`DerivedQuery`] is pre-registered so callers of the
162 /// planner executor and the relation / cycle / unused queries never have
163 /// to hand-register them. Extra query types can still be added via
164 /// [`Self::register`].
165 #[must_use]
166 pub fn new(snapshot: Arc<GraphSnapshot>, config: QueryDbConfig) -> Self {
167 let inputs = FileInputStore::from_snapshot(&snapshot);
168 let mut db = Self {
169 inputs,
170 cache: ShardedCache::new(config.shard_count),
171 edge_revision: AtomicU64::new(0),
172 metadata_revision: AtomicU64::new(0),
173 registry: QueryRegistry::new(),
174 snapshot,
175 config,
176 cache_hits: AtomicU64::new(0),
177 cache_misses: AtomicU64::new(0),
178 cold_load_allowed: AtomicBool::new(true),
179 };
180 db.register_builtin_queries();
181 db
182 }
183
184 /// Registers the queries that ship with `sqry-db`.
185 ///
186 /// Called from [`Self::new`]; exposed as a helper so tests can reuse it
187 /// when they set up bespoke `QueryDb` instances.
188 fn register_builtin_queries(&mut self) {
189 // Foundational queries
190 self.registry.register::<queries::SccQuery>();
191 self.registry.register::<queries::CondensationQuery>();
192 self.registry.register::<queries::ReachabilityQuery>();
193 // DB14 relation queries
194 self.registry.register::<queries::CallersQuery>();
195 self.registry.register::<queries::CalleesQuery>();
196 self.registry.register::<queries::ImportsQuery>();
197 self.registry.register::<queries::ExportsQuery>();
198 self.registry.register::<queries::ReferencesQuery>();
199 self.registry.register::<queries::ImplementsQuery>();
200 // DB14 analysis queries
201 self.registry.register::<queries::CyclesQuery>();
202 self.registry.register::<queries::IsInCycleQuery>();
203 self.registry.register::<queries::EntryPointsQuery>();
204 self.registry
205 .register::<queries::ReachableFromEntryPointsQuery>();
206 self.registry.register::<queries::UnusedQuery>();
207 self.registry.register::<queries::IsNodeUnusedQuery>();
208 }
209
210 /// Registers a derived query type for cache routing.
211 ///
212 /// Must be called once per query type before the first `get` call.
213 /// Panics (in debug builds) if the query type is already registered.
214 pub fn register<Q: DerivedQuery>(&mut self) {
215 self.registry.register::<Q>();
216 }
217
218 /// Evaluates a derived query, returning the cached result if valid or
219 /// recomputing and caching on miss/invalidation.
220 ///
221 /// # Cache validation
222 ///
223 /// A cached result is returned only if ALL applicable tiers pass:
224 /// 1. Every `(FileId, revision)` pair in the cached deps matches the
225 /// current file revision in `FileInputStore`.
226 /// 2. If `Q::TRACKS_EDGE_REVISION`, the cached `edge_revision` matches
227 /// the current global counter.
228 /// 3. If `Q::TRACKS_METADATA_REVISION`, the cached `metadata_revision`
229 /// matches the current global counter.
230 pub fn get<Q: DerivedQuery>(&self, key: &Q::Key) -> Q::Value {
231 let query_key = QueryKey::new::<Q>(key);
232 let shard_idx = self.registry.shard_for::<Q>(self.cache.shard_count());
233
234 // Fast path: typed cached value with valid tiers (warm path).
235 if let Some(value) = self
236 .cache
237 .get_if_valid::<Q::Value>(shard_idx, &query_key, |cached| {
238 self.validate_cached_result::<Q>(cached)
239 })
240 {
241 self.cache_hits.fetch_add(1, Ordering::Relaxed);
242 return value;
243 }
244
245 // Cold-load rehydration path: entry may exist as raw-bytes-only, placed
246 // by `ShardedCache::insert_validated` during `load_derived`. If the
247 // tiers still validate, decode the raw bytes into `Q::Value` and
248 // promote the entry to the typed fast-path for subsequent calls. This
249 // is what makes spec §2's "first query after a cold start is free"
250 // actually hold: no recomputation is performed when the rehydrated
251 // entry is valid.
252 if let Some(value) =
253 self.cache
254 .get_cold_if_valid::<Q::Value>(shard_idx, &query_key, |cached| {
255 self.validate_cached_result::<Q>(cached)
256 })
257 {
258 self.cache_hits.fetch_add(1, Ordering::Relaxed);
259 return value;
260 }
261
262 // Cache miss or invalidated — recompute
263 self.cache_misses.fetch_add(1, Ordering::Relaxed);
264 let guard = DependencyRecorderGuard::new();
265 let value = Q::execute(key, self, &self.snapshot);
266 let file_deps = guard.finish(&self.inputs);
267
268 let edge_rev = if Q::TRACKS_EDGE_REVISION {
269 Some(self.edge_revision.load(Ordering::Acquire))
270 } else {
271 None
272 };
273 let metadata_rev = if Q::TRACKS_METADATA_REVISION {
274 Some(self.metadata_revision.load(Ordering::Acquire))
275 } else {
276 None
277 };
278
279 // insert_query serialises key+value for persistent queries and enforces
280 // the max_entry_size_bytes cap. For non-persistent queries it stores
281 // the typed value with empty raw bytes. For oversized entries it skips
282 // caching entirely (soft skip). The caller's computed value is always
283 // returned regardless of whether caching succeeded.
284 //
285 // Serialisation errors are non-fatal: log a warning and fall back to
286 // the bare untyped insert so the entry is at least cache-warm for the
287 // current session (just not persistent).
288 if let Err(err) = self.cache.insert_query::<Q>(
289 shard_idx,
290 query_key.clone(),
291 key,
292 value.clone(),
293 file_deps.clone(),
294 edge_rev,
295 metadata_rev,
296 &self.config,
297 ) {
298 log::warn!(
299 "sqry-db: failed to serialise cache entry for query_type_id={:#06x}: {err}; \
300 storing typed value without persistence",
301 Q::QUERY_TYPE_ID,
302 );
303 self.cache.insert(
304 shard_idx,
305 query_key,
306 cache::CachedResult::new(value.clone(), file_deps, edge_rev, metadata_rev),
307 );
308 }
309
310 value
311 }
312
313 /// Returns the current graph snapshot.
314 #[inline]
315 #[must_use]
316 pub fn snapshot(&self) -> &GraphSnapshot {
317 &self.snapshot
318 }
319
320 /// Returns the current graph snapshot as an `Arc`.
321 #[inline]
322 #[must_use]
323 pub fn snapshot_arc(&self) -> Arc<GraphSnapshot> {
324 Arc::clone(&self.snapshot)
325 }
326
327 /// Replaces the current snapshot with a new one.
328 ///
329 /// This does NOT invalidate the cache. Use [`bump_edge_revision`] and/or
330 /// [`bump_metadata_revision`] after updating to trigger invalidation on
331 /// next access.
332 pub fn set_snapshot(&mut self, snapshot: Arc<GraphSnapshot>) {
333 self.snapshot = snapshot;
334 }
335
336 /// Returns a reference to the file input store.
337 #[inline]
338 #[must_use]
339 pub fn inputs(&self) -> &FileInputStore {
340 &self.inputs
341 }
342
343 /// Returns a mutable reference to the file input store.
344 #[inline]
345 pub fn inputs_mut(&mut self) -> &mut FileInputStore {
346 &mut self.inputs
347 }
348
349 /// Returns the current global edge revision counter.
350 #[inline]
351 #[must_use]
352 pub fn edge_revision(&self) -> u64 {
353 self.edge_revision.load(Ordering::Acquire)
354 }
355
356 /// Atomically increments the global edge revision counter and returns the
357 /// new value.
358 pub fn bump_edge_revision(&self) -> u64 {
359 self.edge_revision.fetch_add(1, Ordering::Release) + 1
360 }
361
362 /// Returns the current global metadata revision counter.
363 #[inline]
364 #[must_use]
365 pub fn metadata_revision(&self) -> u64 {
366 self.metadata_revision.load(Ordering::Acquire)
367 }
368
369 /// Atomically increments the global metadata revision counter and returns
370 /// the new value.
371 pub fn bump_metadata_revision(&self) -> u64 {
372 self.metadata_revision.fetch_add(1, Ordering::Release) + 1
373 }
374
375 /// Returns the database configuration.
376 #[inline]
377 #[must_use]
378 pub fn config(&self) -> &QueryDbConfig {
379 &self.config
380 }
381
382 /// Returns a snapshot of the cache-usage counters.
383 ///
384 /// The returned [`QueryDbMetrics`] is a coherent read of the hit and miss
385 /// counters (modulo the usual memory-ordering semantics of two separate
386 /// atomic reads). The counters are monotonic and never reset — callers
387 /// interested in deltas should take a baseline, run the work, and diff.
388 ///
389 /// Used by Phase 3C DB21 Proof 3 to assert "zero recomputation" after a
390 /// cache warm-up, and by production telemetry to emit cache hit rates.
391 #[inline]
392 #[must_use]
393 pub fn metrics(&self) -> QueryDbMetrics {
394 QueryDbMetrics {
395 cache_hits: self.cache_hits.load(Ordering::Relaxed),
396 cache_misses: self.cache_misses.load(Ordering::Relaxed),
397 }
398 }
399
400 /// Invalidates all cached results unconditionally.
401 ///
402 /// This is a heavy operation used for full rebuilds.
403 pub fn invalidate_all(&self) {
404 self.cache.clear_all();
405 }
406
407 /// Returns `true` if a cold-load is still permitted for this `QueryDb`.
408 ///
409 /// `true` at construction. Transitions to `false` after a successful
410 /// [`persistence::load_derived`] call. Once `false`, subsequent
411 /// `load_derived` calls return
412 /// [`persistence::LoadError::AlreadyLoaded`] without reading the file.
413 #[inline]
414 #[must_use]
415 pub fn cold_load_allowed(&self) -> bool {
416 self.cold_load_allowed.load(Ordering::Acquire)
417 }
418
419 /// Apply a validated cold-load staging result. **Infallible by construction.**
420 ///
421 /// Pre-conditions enforced by the caller (`load_derived`):
422 /// - `header.magic == DERIVED_MAGIC`
423 /// - `header.format_version == DERIVED_FORMAT_VERSION`
424 /// - `header.snapshot_sha256` matched the caller-supplied SHA
425 /// - `staged` is the set of valid entries (unknown-ID entries filtered out
426 /// at validation stage)
427 ///
428 /// Uses only infallible operations:
429 /// - [`AtomicU64::store`] for `edge_revision` / `metadata_revision`
430 /// - [`input::FileInputStore::restore_revisions`] (infallible `HashMap::insert`)
431 /// - [`cache::ShardedCache::insert_validated`] (infallible `HashMap::insert`)
432 pub(crate) fn commit_staged_load(
433 &mut self,
434 header: persistence::DerivedHeader,
435 staged: Vec<persistence::StagedEntry>,
436 ) {
437 // INVARIANT: all calls below are infallible — see spec §5.7
438 // No `?`, no `.map_err(...)`, no `Result`-bearing call is permitted here.
439 // If you find yourself wanting to return Err from this function, that is
440 // a spec violation — move the fallible work into `load_derived` before
441 // the commit boundary.
442
443 // Tier 2: restore global edge revision.
444 self.edge_revision
445 .store(header.edge_revision, Ordering::Release);
446
447 // Tier 3: restore global metadata revision.
448 self.metadata_revision
449 .store(header.metadata_revision, Ordering::Release);
450
451 // Tier 1: restore per-file revision counters.
452 self.inputs.restore_revisions(&header.file_revisions);
453
454 // Warm the cache with raw-byte entries for each staged result.
455 for entry in staged {
456 self.cache.insert_validated(
457 entry.query_type_id,
458 entry.raw_key_bytes.into(),
459 entry.raw_result_bytes.into(),
460 entry.deps,
461 );
462 }
463 }
464
465 /// Yields all persistent cache entries for the SAVE_PATH persistence unit.
466 ///
467 /// Delegates to [`ShardedCache::iter_persistent`]. This crate-internal
468 /// accessor exists to allow `persistence::save_derived` (a sibling module)
469 /// to drive the save loop without exposing the raw `cache` field.
470 pub(crate) fn iter_persistent_cache_entries(
471 &self,
472 ) -> impl Iterator<Item = cache::PersistableEntry> + '_ {
473 self.cache.iter_persistent()
474 }
475
476 /// Validates a cached result against all applicable dependency tiers.
477 fn validate_cached_result<Q: DerivedQuery>(&self, cached: &cache::CachedResult) -> bool {
478 // Tier 1: File-level deps
479 if !cached.validate_file_deps(&self.inputs) {
480 return false;
481 }
482
483 // Tier 2: Global edge revision
484 if Q::TRACKS_EDGE_REVISION {
485 if let Some(cached_rev) = cached.edge_revision() {
486 if cached_rev != self.edge_revision.load(Ordering::Acquire) {
487 return false;
488 }
489 } else {
490 // Query tracks edge revision but cache entry doesn't have one
491 return false;
492 }
493 }
494
495 // Tier 3: Global metadata revision
496 if Q::TRACKS_METADATA_REVISION {
497 if let Some(cached_rev) = cached.metadata_revision() {
498 if cached_rev != self.metadata_revision.load(Ordering::Acquire) {
499 return false;
500 }
501 } else {
502 return false;
503 }
504 }
505
506 true
507 }
508}
509
510// SAFETY: All internal state is either atomic or behind parking_lot locks.
511// The `Arc<GraphSnapshot>` is immutable.
512unsafe impl Send for QueryDb {}
513unsafe impl Sync for QueryDb {}
514
515#[cfg(test)]
516mod tests {
517 use super::*;
518 use sqry_core::graph::unified::concurrent::CodeGraph;
519
520 #[test]
521 fn query_db_is_send_sync() {
522 fn assert_send_sync<T: Send + Sync>() {}
523 assert_send_sync::<QueryDb>();
524 }
525
526 /// DB14 + Codex review (2026-04-15): `QueryDb::new` pre-registers every
527 /// built-in `DerivedQuery`. Legacy / test call sites that re-register
528 /// the same built-in after `QueryDb::new()` must be a no-op — the
529 /// registry count must stay stable so duplicate registration does not
530 /// silently alter shard routing or count semantics.
531 #[test]
532 fn query_db_new_built_in_registration_is_stable_under_repeats() {
533 let snapshot = std::sync::Arc::new(CodeGraph::new().snapshot());
534 let mut db = QueryDb::new(snapshot, QueryDbConfig::default());
535
536 let count_after_new = db.registry.registered_count();
537 // Re-register every DB14-shipped built-in. Each call must be a no-op.
538 db.register::<queries::SccQuery>();
539 db.register::<queries::CondensationQuery>();
540 db.register::<queries::ReachabilityQuery>();
541 db.register::<queries::CallersQuery>();
542 db.register::<queries::CalleesQuery>();
543 db.register::<queries::ImportsQuery>();
544 db.register::<queries::ExportsQuery>();
545 db.register::<queries::ReferencesQuery>();
546 db.register::<queries::ImplementsQuery>();
547 db.register::<queries::CyclesQuery>();
548 db.register::<queries::IsInCycleQuery>();
549 db.register::<queries::EntryPointsQuery>();
550 db.register::<queries::ReachableFromEntryPointsQuery>();
551 db.register::<queries::UnusedQuery>();
552 db.register::<queries::IsNodeUnusedQuery>();
553
554 assert_eq!(
555 db.registry.registered_count(),
556 count_after_new,
557 "repeat register of built-ins must not bump count"
558 );
559 // Ensure the count is non-zero (i.e. built-ins were actually
560 // registered by the constructor — a regression where new() forgets
561 // to call register_builtin_queries() would surface here).
562 assert!(
563 count_after_new >= 15,
564 "QueryDb::new must pre-register at least the 15 DB14 built-ins, got {count_after_new}"
565 );
566 }
567}