sqry-db 9.0.22

Salsa-style incremental computation engine for sqry semantic code search
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
//! `sqry-db`: Salsa-style incremental computation engine for sqry.
//!
//! This crate provides a demand-driven memoized query engine with file-level
//! dependency tracking, three-tier cache invalidation, and incremental indexing
//! support. All analysis queries (callers, callees, dependency impact, unused
//! detection, etc.) are expressed as [`DerivedQuery`] implementations whose
//! results are cached in a 64-shard [`ShardedCache`] and automatically
//! invalidated when their input dependencies change.
//!
//! # Architecture
//!
//! ```text
//! ┌─────────────┐      ┌──────────────────┐      ┌──────────────���─┐
//! │  MCP / CLI   │─────▶│     QueryDb      │─────▶│  GraphSnapshot │
//! │  handlers    │      │  (ShardedCache)   │      │  (immutable)   │
//! └─────────────┘      └──────────────────┘      └────���───────────┘
//!                             │                          │
//!                       ┌─────┴──────┐           ┌──────┴───────┐
//!                       │ FileInput  │           │  NodeArena   │
//!                       │   Store    │           │  EdgeStore   │
//!                       └────────────┘           └──────────────┘
//! ```
//!
//! # Three-Tier Invalidation
//!
//! - **Tier 1 — File-level deps**: Every query records the `FileId` of every
//!   node it reads. Invalidated when any recorded file's revision bumps.
//! - **Tier 2 — Global edge revision**: A single `AtomicU64` that increments
//!   when *any* file's edges change. Reverse queries (`callers_of`,
//!   `is_node_in_cycle`) set `TRACKS_EDGE_REVISION = true`.
//! - **Tier 3 — Global metadata revision**: A single `AtomicU64` that
//!   increments when node metadata changes. `find_unused` sets
//!   `TRACKS_METADATA_REVISION = true`.
//!
//! # Usage
//!
//! ```rust,ignore
//! use sqry_db::{QueryDb, QueryDbConfig};
//!
//! let config = QueryDbConfig::default();
//! let db = QueryDb::new(snapshot, config);
//! let result = db.get::<CallersQuery>(&node_id);
//! ```

pub mod cache;
pub mod compaction;
pub mod comparative;
pub mod config;
pub mod dependency;
pub mod input;
pub mod persistence;
pub mod planner;
pub mod queries;
pub mod query;

use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};

use sqry_core::graph::unified::concurrent::GraphSnapshot;

pub use cache::ShardedCache;
pub use comparative::{
    ChangeType, ComparativeQueryDb, DiffOptions, DiffOutput, DiffSummary, NodeChange, NodeLocation,
};
pub use config::QueryDbConfig;
pub use dependency::DependencyRecorderGuard;
pub use input::{FileInput, FileInputStore};
pub use persistence::{
    DERIVED_FORMAT_VERSION, DERIVED_MAGIC, DerivedHeader, DerivedManifest, LoadError, LoadOutcome,
    PersistedEntry, QueryDeps, SkipReason, StagedEntry, load_derived,
};
pub use queries::dispatch::{derived_path, load_derived_opportunistic, make_query_db_cold};
pub use query::{DerivedQuery, QueryKey, QueryRegistry};
// `QueryDb` and `QueryDbMetrics` are defined in this module; re-exported
// at the crate root so callers can `use sqry_db::{QueryDb, QueryDbMetrics}`
// without qualifying the module path.

/// The central incremental computation database.
///
/// `QueryDb` owns the sharded query cache, file-level input store, and global
/// revision counters. All derived queries are evaluated and cached through
/// [`QueryDb::get`].
///
/// # Concurrency
///
/// `QueryDb` is `Send + Sync`. The sharded cache uses `parking_lot::RwLock`
/// per shard (64 shards) for high-concurrency reads. Global revision counters
/// use `AtomicU64` with `Ordering::Acquire` / `Ordering::Release` semantics.
pub struct QueryDb {
    /// Per-file fact sets with revision counters.
    inputs: FileInputStore,
    /// 64-shard query cache.
    cache: ShardedCache,
    /// Global edge revision counter.
    ///
    /// Incremented when *any* file's edges change during `reindex_files`.
    /// Queries with `TRACKS_EDGE_REVISION = true` are invalidated when this
    /// counter advances past their cached `edge_revision` value.
    edge_revision: AtomicU64,
    /// Global node metadata revision counter.
    ///
    /// Incremented when node metadata (visibility, entry-point classification,
    /// node kind) changes. Queries with `TRACKS_METADATA_REVISION = true` are
    /// invalidated when this counter advances.
    metadata_revision: AtomicU64,
    /// Query type registry mapping `TypeId` to shard assignments.
    registry: QueryRegistry,
    /// Reference to the current graph snapshot.
    snapshot: Arc<GraphSnapshot>,
    /// Configuration for cache behavior, compaction thresholds, etc.
    config: QueryDbConfig,
    /// Cumulative cache hits — incremented on every `get()` that returns a
    /// cached value without invoking `Q::execute`.
    ///
    /// Exposed for proof-point tests (Phase 3C DB21 Proof 3) and production
    /// telemetry. Use [`Self::metrics`] to read a coherent snapshot.
    cache_hits: AtomicU64,
    /// Cumulative cache misses — incremented on every `get()` that invokes
    /// `Q::execute` because the entry was missing or its dependency tiers
    /// failed validation.
    cache_misses: AtomicU64,
    /// Guards the cold-load window.
    ///
    /// `true` at construction — exactly one successful `load_derived` call is
    /// permitted per `QueryDb` instance. After a successful load this is set
    /// to `false` (via `Release` ordering). Subsequent calls to `load_derived`
    /// return [`persistence::LoadError::AlreadyLoaded`] immediately, without
    /// reading the file, so double-apply is impossible.
    ///
    /// The field is intentionally `pub(crate)` so that the sibling
    /// `persistence` module can check and flip it atomically.
    pub(crate) cold_load_allowed: AtomicBool,
}

/// Lightweight snapshot of `QueryDb` cache-usage counters.
///
/// Returned by [`QueryDb::metrics`]. Counters are monotonic and never reset
/// during the lifetime of a `QueryDb` — tests should read a baseline before
/// the work under test, run the work, read again, and diff.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct QueryDbMetrics {
    /// Total cache hits served since this `QueryDb` was constructed.
    pub cache_hits: u64,
    /// Total cache misses (including dependency-invalidated entries) served
    /// since this `QueryDb` was constructed.
    pub cache_misses: u64,
}

impl QueryDbMetrics {
    /// Total `get()` calls observed (`cache_hits + cache_misses`).
    #[inline]
    #[must_use]
    pub fn total_gets(&self) -> u64 {
        self.cache_hits.saturating_add(self.cache_misses)
    }
}

impl QueryDb {
    /// Creates a new `QueryDb` backed by the given graph snapshot.
    ///
    /// Every built-in [`DerivedQuery`] is pre-registered so callers of the
    /// planner executor and the relation / cycle / unused queries never have
    /// to hand-register them. Extra query types can still be added via
    /// [`Self::register`].
    #[must_use]
    pub fn new(snapshot: Arc<GraphSnapshot>, config: QueryDbConfig) -> Self {
        let inputs = FileInputStore::from_snapshot(&snapshot);
        let mut db = Self {
            inputs,
            cache: ShardedCache::new(config.shard_count),
            edge_revision: AtomicU64::new(0),
            metadata_revision: AtomicU64::new(0),
            registry: QueryRegistry::new(),
            snapshot,
            config,
            cache_hits: AtomicU64::new(0),
            cache_misses: AtomicU64::new(0),
            cold_load_allowed: AtomicBool::new(true),
        };
        db.register_builtin_queries();
        db
    }

    /// Registers the queries that ship with `sqry-db`.
    ///
    /// Called from [`Self::new`]; exposed as a helper so tests can reuse it
    /// when they set up bespoke `QueryDb` instances.
    fn register_builtin_queries(&mut self) {
        // Foundational queries
        self.registry.register::<queries::SccQuery>();
        self.registry.register::<queries::CondensationQuery>();
        self.registry.register::<queries::ReachabilityQuery>();
        // DB14 relation queries
        self.registry.register::<queries::CallersQuery>();
        self.registry.register::<queries::CalleesQuery>();
        self.registry.register::<queries::ImportsQuery>();
        self.registry.register::<queries::ExportsQuery>();
        self.registry.register::<queries::ReferencesQuery>();
        self.registry.register::<queries::ImplementsQuery>();
        // DB14 analysis queries
        self.registry.register::<queries::CyclesQuery>();
        self.registry.register::<queries::IsInCycleQuery>();
        self.registry.register::<queries::EntryPointsQuery>();
        self.registry
            .register::<queries::ReachableFromEntryPointsQuery>();
        self.registry.register::<queries::UnusedQuery>();
        self.registry.register::<queries::IsNodeUnusedQuery>();
    }

    /// Registers a derived query type for cache routing.
    ///
    /// Must be called once per query type before the first `get` call.
    /// Panics (in debug builds) if the query type is already registered.
    pub fn register<Q: DerivedQuery>(&mut self) {
        self.registry.register::<Q>();
    }

    /// Evaluates a derived query, returning the cached result if valid or
    /// recomputing and caching on miss/invalidation.
    ///
    /// # Cache validation
    ///
    /// A cached result is returned only if ALL applicable tiers pass:
    /// 1. Every `(FileId, revision)` pair in the cached deps matches the
    ///    current file revision in `FileInputStore`.
    /// 2. If `Q::TRACKS_EDGE_REVISION`, the cached `edge_revision` matches
    ///    the current global counter.
    /// 3. If `Q::TRACKS_METADATA_REVISION`, the cached `metadata_revision`
    ///    matches the current global counter.
    pub fn get<Q: DerivedQuery>(&self, key: &Q::Key) -> Q::Value {
        let query_key = QueryKey::new::<Q>(key);
        let shard_idx = self.registry.shard_for::<Q>(self.cache.shard_count());

        // Fast path: typed cached value with valid tiers (warm path).
        if let Some(value) = self
            .cache
            .get_if_valid::<Q::Value>(shard_idx, &query_key, |cached| {
                self.validate_cached_result::<Q>(cached)
            })
        {
            self.cache_hits.fetch_add(1, Ordering::Relaxed);
            return value;
        }

        // Cold-load rehydration path: entry may exist as raw-bytes-only, placed
        // by `ShardedCache::insert_validated` during `load_derived`. If the
        // tiers still validate, decode the raw bytes into `Q::Value` and
        // promote the entry to the typed fast-path for subsequent calls. This
        // is what makes spec §2's "first query after a cold start is free"
        // actually hold: no recomputation is performed when the rehydrated
        // entry is valid.
        if let Some(value) =
            self.cache
                .get_cold_if_valid::<Q::Value>(shard_idx, &query_key, |cached| {
                    self.validate_cached_result::<Q>(cached)
                })
        {
            self.cache_hits.fetch_add(1, Ordering::Relaxed);
            return value;
        }

        // Cache miss or invalidated — recompute
        self.cache_misses.fetch_add(1, Ordering::Relaxed);
        let guard = DependencyRecorderGuard::new();
        let value = Q::execute(key, self, &self.snapshot);
        let file_deps = guard.finish(&self.inputs);

        let edge_rev = if Q::TRACKS_EDGE_REVISION {
            Some(self.edge_revision.load(Ordering::Acquire))
        } else {
            None
        };
        let metadata_rev = if Q::TRACKS_METADATA_REVISION {
            Some(self.metadata_revision.load(Ordering::Acquire))
        } else {
            None
        };

        // insert_query serialises key+value for persistent queries and enforces
        // the max_entry_size_bytes cap. For non-persistent queries it stores
        // the typed value with empty raw bytes. For oversized entries it skips
        // caching entirely (soft skip). The caller's computed value is always
        // returned regardless of whether caching succeeded.
        //
        // Serialisation errors are non-fatal: log a warning and fall back to
        // the bare untyped insert so the entry is at least cache-warm for the
        // current session (just not persistent).
        if let Err(err) = self.cache.insert_query::<Q>(
            shard_idx,
            query_key.clone(),
            key,
            value.clone(),
            file_deps.clone(),
            edge_rev,
            metadata_rev,
            &self.config,
        ) {
            log::warn!(
                "sqry-db: failed to serialise cache entry for query_type_id={:#06x}: {err}; \
                 storing typed value without persistence",
                Q::QUERY_TYPE_ID,
            );
            self.cache.insert(
                shard_idx,
                query_key,
                cache::CachedResult::new(value.clone(), file_deps, edge_rev, metadata_rev),
            );
        }

        value
    }

    /// Returns the current graph snapshot.
    #[inline]
    #[must_use]
    pub fn snapshot(&self) -> &GraphSnapshot {
        &self.snapshot
    }

    /// Returns the current graph snapshot as an `Arc`.
    #[inline]
    #[must_use]
    pub fn snapshot_arc(&self) -> Arc<GraphSnapshot> {
        Arc::clone(&self.snapshot)
    }

    /// Replaces the current snapshot with a new one.
    ///
    /// This does NOT invalidate the cache. Use [`bump_edge_revision`] and/or
    /// [`bump_metadata_revision`] after updating to trigger invalidation on
    /// next access.
    pub fn set_snapshot(&mut self, snapshot: Arc<GraphSnapshot>) {
        self.snapshot = snapshot;
    }

    /// Returns a reference to the file input store.
    #[inline]
    #[must_use]
    pub fn inputs(&self) -> &FileInputStore {
        &self.inputs
    }

    /// Returns a mutable reference to the file input store.
    #[inline]
    pub fn inputs_mut(&mut self) -> &mut FileInputStore {
        &mut self.inputs
    }

    /// Returns the current global edge revision counter.
    #[inline]
    #[must_use]
    pub fn edge_revision(&self) -> u64 {
        self.edge_revision.load(Ordering::Acquire)
    }

    /// Atomically increments the global edge revision counter and returns the
    /// new value.
    pub fn bump_edge_revision(&self) -> u64 {
        self.edge_revision.fetch_add(1, Ordering::Release) + 1
    }

    /// Returns the current global metadata revision counter.
    #[inline]
    #[must_use]
    pub fn metadata_revision(&self) -> u64 {
        self.metadata_revision.load(Ordering::Acquire)
    }

    /// Atomically increments the global metadata revision counter and returns
    /// the new value.
    pub fn bump_metadata_revision(&self) -> u64 {
        self.metadata_revision.fetch_add(1, Ordering::Release) + 1
    }

    /// Returns the database configuration.
    #[inline]
    #[must_use]
    pub fn config(&self) -> &QueryDbConfig {
        &self.config
    }

    /// Returns a snapshot of the cache-usage counters.
    ///
    /// The returned [`QueryDbMetrics`] is a coherent read of the hit and miss
    /// counters (modulo the usual memory-ordering semantics of two separate
    /// atomic reads). The counters are monotonic and never reset — callers
    /// interested in deltas should take a baseline, run the work, and diff.
    ///
    /// Used by Phase 3C DB21 Proof 3 to assert "zero recomputation" after a
    /// cache warm-up, and by production telemetry to emit cache hit rates.
    #[inline]
    #[must_use]
    pub fn metrics(&self) -> QueryDbMetrics {
        QueryDbMetrics {
            cache_hits: self.cache_hits.load(Ordering::Relaxed),
            cache_misses: self.cache_misses.load(Ordering::Relaxed),
        }
    }

    /// Invalidates all cached results unconditionally.
    ///
    /// This is a heavy operation used for full rebuilds.
    pub fn invalidate_all(&self) {
        self.cache.clear_all();
    }

    /// Returns `true` if a cold-load is still permitted for this `QueryDb`.
    ///
    /// `true` at construction. Transitions to `false` after a successful
    /// [`persistence::load_derived`] call. Once `false`, subsequent
    /// `load_derived` calls return
    /// [`persistence::LoadError::AlreadyLoaded`] without reading the file.
    #[inline]
    #[must_use]
    pub fn cold_load_allowed(&self) -> bool {
        self.cold_load_allowed.load(Ordering::Acquire)
    }

    /// Apply a validated cold-load staging result. **Infallible by construction.**
    ///
    /// Pre-conditions enforced by the caller (`load_derived`):
    /// - `header.magic == DERIVED_MAGIC`
    /// - `header.format_version == DERIVED_FORMAT_VERSION`
    /// - `header.snapshot_sha256` matched the caller-supplied SHA
    /// - `staged` is the set of valid entries (unknown-ID entries filtered out
    ///   at validation stage)
    ///
    /// Uses only infallible operations:
    /// - [`AtomicU64::store`] for `edge_revision` / `metadata_revision`
    /// - [`input::FileInputStore::restore_revisions`] (infallible `HashMap::insert`)
    /// - [`cache::ShardedCache::insert_validated`] (infallible `HashMap::insert`)
    pub(crate) fn commit_staged_load(
        &mut self,
        header: persistence::DerivedHeader,
        staged: Vec<persistence::StagedEntry>,
    ) {
        // INVARIANT: all calls below are infallible — see spec §5.7
        // No `?`, no `.map_err(...)`, no `Result`-bearing call is permitted here.
        // If you find yourself wanting to return Err from this function, that is
        // a spec violation — move the fallible work into `load_derived` before
        // the commit boundary.

        // Tier 2: restore global edge revision.
        self.edge_revision
            .store(header.edge_revision, Ordering::Release);

        // Tier 3: restore global metadata revision.
        self.metadata_revision
            .store(header.metadata_revision, Ordering::Release);

        // Tier 1: restore per-file revision counters.
        self.inputs.restore_revisions(&header.file_revisions);

        // Warm the cache with raw-byte entries for each staged result.
        for entry in staged {
            self.cache.insert_validated(
                entry.query_type_id,
                entry.raw_key_bytes.into(),
                entry.raw_result_bytes.into(),
                entry.deps,
            );
        }
    }

    /// Yields all persistent cache entries for the SAVE_PATH persistence unit.
    ///
    /// Delegates to [`ShardedCache::iter_persistent`]. This crate-internal
    /// accessor exists to allow `persistence::save_derived` (a sibling module)
    /// to drive the save loop without exposing the raw `cache` field.
    pub(crate) fn iter_persistent_cache_entries(
        &self,
    ) -> impl Iterator<Item = cache::PersistableEntry> + '_ {
        self.cache.iter_persistent()
    }

    /// Validates a cached result against all applicable dependency tiers.
    fn validate_cached_result<Q: DerivedQuery>(&self, cached: &cache::CachedResult) -> bool {
        // Tier 1: File-level deps
        if !cached.validate_file_deps(&self.inputs) {
            return false;
        }

        // Tier 2: Global edge revision
        if Q::TRACKS_EDGE_REVISION {
            if let Some(cached_rev) = cached.edge_revision() {
                if cached_rev != self.edge_revision.load(Ordering::Acquire) {
                    return false;
                }
            } else {
                // Query tracks edge revision but cache entry doesn't have one
                return false;
            }
        }

        // Tier 3: Global metadata revision
        if Q::TRACKS_METADATA_REVISION {
            if let Some(cached_rev) = cached.metadata_revision() {
                if cached_rev != self.metadata_revision.load(Ordering::Acquire) {
                    return false;
                }
            } else {
                return false;
            }
        }

        true
    }
}

// SAFETY: All internal state is either atomic or behind parking_lot locks.
// The `Arc<GraphSnapshot>` is immutable.
unsafe impl Send for QueryDb {}
unsafe impl Sync for QueryDb {}

#[cfg(test)]
mod tests {
    use super::*;
    use sqry_core::graph::unified::concurrent::CodeGraph;

    #[test]
    fn query_db_is_send_sync() {
        fn assert_send_sync<T: Send + Sync>() {}
        assert_send_sync::<QueryDb>();
    }

    /// DB14 + Codex review (2026-04-15): `QueryDb::new` pre-registers every
    /// built-in `DerivedQuery`. Legacy / test call sites that re-register
    /// the same built-in after `QueryDb::new()` must be a no-op — the
    /// registry count must stay stable so duplicate registration does not
    /// silently alter shard routing or count semantics.
    #[test]
    fn query_db_new_built_in_registration_is_stable_under_repeats() {
        let snapshot = std::sync::Arc::new(CodeGraph::new().snapshot());
        let mut db = QueryDb::new(snapshot, QueryDbConfig::default());

        let count_after_new = db.registry.registered_count();
        // Re-register every DB14-shipped built-in. Each call must be a no-op.
        db.register::<queries::SccQuery>();
        db.register::<queries::CondensationQuery>();
        db.register::<queries::ReachabilityQuery>();
        db.register::<queries::CallersQuery>();
        db.register::<queries::CalleesQuery>();
        db.register::<queries::ImportsQuery>();
        db.register::<queries::ExportsQuery>();
        db.register::<queries::ReferencesQuery>();
        db.register::<queries::ImplementsQuery>();
        db.register::<queries::CyclesQuery>();
        db.register::<queries::IsInCycleQuery>();
        db.register::<queries::EntryPointsQuery>();
        db.register::<queries::ReachableFromEntryPointsQuery>();
        db.register::<queries::UnusedQuery>();
        db.register::<queries::IsNodeUnusedQuery>();

        assert_eq!(
            db.registry.registered_count(),
            count_after_new,
            "repeat register of built-ins must not bump count"
        );
        // Ensure the count is non-zero (i.e. built-ins were actually
        // registered by the constructor — a regression where new() forgets
        // to call register_builtin_queries() would surface here).
        assert!(
            count_after_new >= 15,
            "QueryDb::new must pre-register at least the 15 DB14 built-ins, got {count_after_new}"
        );
    }
}