Skip to main content

sqry_db/
query.rs

1//! Derived query trait and `TypeId`-based query registry.
2//!
3//! All analysis queries implement [`DerivedQuery`]. The registry maps each
4//! query type's `TypeId` to a shard assignment in the [`ShardedCache`].
5//! Registration is explicit — no proc macros.
6
7use std::any::TypeId;
8use std::collections::HashMap;
9use std::hash::{Hash, Hasher};
10
11use sqry_core::graph::unified::concurrent::GraphSnapshot;
12
13use crate::QueryDb;
14
15/// A derived query whose result is memoized in the [`ShardedCache`].
16///
17/// Concrete queries implement this trait. `QueryDb::get::<Q>(key)` checks
18/// the cache, validates all applicable dependency tiers, and either returns
19/// the cached value or calls `Q::execute`, records dependencies, and caches.
20///
21/// # Dependency tracking
22///
23/// During `execute`, the query should call [`record_file_dep`](crate::dependency::record_file_dep)
24/// for every `FileId` it reads from. The thread-local recorder collects these
25/// and pairs them with revision numbers after execution completes.
26///
27/// # Tier flags
28///
29/// - `TRACKS_EDGE_REVISION`: Set to `true` for queries that answer "who points
30///   at X" (callers, cycle membership, reachability). Invalidated when ANY
31///   file's edges change.
32/// - `TRACKS_METADATA_REVISION`: Set to `true` for queries that read node
33///   metadata (entry-point classification for `find_unused`). Invalidated when
34///   node metadata changes without edge changes.
35pub trait DerivedQuery: Send + Sync + 'static {
36    /// The input key for this query.
37    ///
38    /// The `serde::Serialize` bound is required by the PN3 persistence layer:
39    /// [`ShardedCache::insert_query`] serialises the key to raw bytes at insert
40    /// time so that the key can be round-tripped to disk without re-running the
41    /// query. All built-in query keys already implement `Serialize`; downstream
42    /// or test keys using primitive types (`u32`, `String`, etc.) also satisfy
43    /// this bound automatically.
44    type Key: Hash + Eq + Clone + Send + serde::Serialize + 'static;
45    /// The output value. Must be `Clone` for cache retrieval.
46    ///
47    /// The `serde::Serialize` bound is required by the PN3 persistence layer
48    /// so that warm-path writes can be saved to disk.
49    /// The `serde::de::DeserializeOwned` bound is required by the PN3 cold-load
50    /// path: when a rehydrated entry is first found by a typed `get::<Q>`, the
51    /// raw result bytes must be decoded back into `Q::Value` before they can
52    /// be returned to the caller. Without this bound, "first query after cold
53    /// start is free" would degrade into "first query misses, second is free",
54    /// which violates spec §2.
55    /// Non-serialisable value types must use `PERSISTENT = false` to opt out
56    /// of raw-byte retention.
57    type Value: Clone + Send + Sync + serde::Serialize + serde::de::DeserializeOwned + 'static;
58
59    /// Stable on-disk discriminator for PN3 derived-cache persistence.
60    ///
61    /// This `u32` is the key written to `.sqry/graph/derived.sqry` to
62    /// identify the query type across process restarts. Unlike
63    /// `TypeId::of::<Self>()`, which is process-local and not stable
64    /// across compiler versions, this ID is fixed at authoring time and
65    /// **must never change or be reused** once any derived cache file
66    /// containing it has been written.
67    ///
68    /// # Reserved ranges
69    ///
70    /// - `0x0000` — invalid / unknown. Must never appear on disk.
71    /// - `0x0001..=0x000F` — built-in queries (15 assigned).
72    /// - `0x0010..=0x0FFF` — reserved for additional built-ins.
73    /// - `0x1000..=0xFFFF` — reserved for downstream / future built-ins.
74    ///
75    /// See `sqry_db::queries::type_ids` for the canonical constants.
76    /// There is no default value: every implementation must set this
77    /// explicitly to prevent accidental ID collisions.
78    const QUERY_TYPE_ID: u32;
79
80    /// Whether this query's `Value` satisfies the serde bounds required
81    /// for PN3 cold-start persistence.
82    ///
83    /// Set to `false` for queries whose `Value` type cannot (or should
84    /// not) be serialized to disk. When `false`, the PN3 persistence
85    /// layer skips this query entirely and will recompute it on demand
86    /// after a cold start.
87    ///
88    /// The default is `true`; implementations opt out by overriding.
89    const PERSISTENT: bool = true;
90
91    /// Whether this query depends on the global edge topology.
92    ///
93    /// When `true`, the cached result is invalidated whenever the global edge
94    /// revision counter advances (even if the specific files the query read
95    /// haven't changed). This handles the "missing negative dependency"
96    /// problem: a new file could add an edge that the query never saw.
97    const TRACKS_EDGE_REVISION: bool = false;
98
99    /// Whether this query depends on node metadata.
100    ///
101    /// When `true`, the cached result is invalidated whenever the global
102    /// metadata revision counter advances.
103    const TRACKS_METADATA_REVISION: bool = false;
104
105    /// Computes the query result.
106    ///
107    /// Implementations MUST call [`record_file_dep`](crate::dependency::record_file_dep)
108    /// for every `FileId` they read from the snapshot.
109    fn execute(key: &Self::Key, db: &QueryDb, snapshot: &GraphSnapshot) -> Self::Value;
110}
111
112/// A type-erased query cache key combining the query type's stable
113/// `QUERY_TYPE_ID` discriminator and the hash of the postcard encoding of the
114/// input key.
115///
116/// Key equality is **build/process-independent**: both components derive from
117/// values that are stable across restarts, enabling PN3's `load_derived` to
118/// rehydrate entries directly into the same cache slots warm-path
119/// `insert_query` and `get` use.
120#[derive(Clone, Eq, PartialEq)]
121pub struct QueryKey {
122    /// `Q::QUERY_TYPE_ID` (the stable u32 discriminator from spec §5.1) widened
123    /// to `u64`. Shared between warm-path `QueryKey::new::<Q>` and cold-path
124    /// `ShardedCache::insert_validated`.
125    type_hash: u64,
126    /// Hash of `postcard::to_allocvec(&key)` bytes. Shared between warm-path
127    /// and cold-path for the same reason.
128    key_hash: u64,
129}
130
131impl QueryKey {
132    /// Creates a new query key from a concrete query type and input key.
133    ///
134    /// Both components of the key are computed from **stable, process-independent**
135    /// sources so that entries rehydrated by PN3's cold-load path
136    /// (`ShardedCache::insert_validated`, which has no typed `Q` and works from
137    /// raw bytes alone) live in the SAME key space as warm-path entries. This
138    /// is what makes "first query after a cold start is free" actually hold:
139    ///
140    /// - `type_hash` is `Q::QUERY_TYPE_ID as u64` (the stable on-disk
141    ///   discriminator defined in spec §5.1), **not** `TypeId::of::<Q>()` which
142    ///   is build/process-local.
143    /// - `key_hash` is a hash of the postcard encoding of `key`, matching what
144    ///   `ShardedCache::insert_query` already stores as `raw_key_bytes` and what
145    ///   `ShardedCache::insert_validated` hashes at cold-load time.
146    ///
147    /// Deterministic serialization of the key is guaranteed by the
148    /// `serde::Serialize` bound on `DerivedQuery::Key`; a panic on serialization
149    /// failure would indicate a serde derive bug and is treated as a
150    /// non-recoverable programmer error here.
151    pub fn new<Q: DerivedQuery>(key: &Q::Key) -> Self {
152        let type_hash = u64::from(Q::QUERY_TYPE_ID);
153        let key_hash = Self::hash_serialized_key(key);
154        Self {
155            type_hash,
156            key_hash,
157        }
158    }
159
160    /// Creates a raw query key for testing purposes.
161    #[doc(hidden)]
162    #[must_use]
163    pub fn from_raw(type_hash: u64, key_hash: u64) -> Self {
164        Self {
165            type_hash,
166            key_hash,
167        }
168    }
169
170    /// Hash the postcard encoding of a key. This matches the key-space used by
171    /// `ShardedCache::insert_validated` at cold-load time: both paths hash the
172    /// same byte sequence, so a typed `get::<Q>(&key)` issued immediately after
173    /// `load_derived` will find the rehydrated entry on the **first** call.
174    fn hash_serialized_key<K: serde::Serialize>(key: &K) -> u64 {
175        let bytes = postcard::to_allocvec(key).expect(
176            "DerivedQuery::Key requires serde::Serialize to be infallible; \
177             postcard::to_allocvec must not fail",
178        );
179        let mut hasher = std::hash::DefaultHasher::new();
180        bytes.hash(&mut hasher);
181        hasher.finish()
182    }
183}
184
185impl Hash for QueryKey {
186    fn hash<H: Hasher>(&self, state: &mut H) {
187        self.type_hash.hash(state);
188        self.key_hash.hash(state);
189    }
190}
191
192impl std::fmt::Debug for QueryKey {
193    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194        f.debug_struct("QueryKey")
195            .field("type_hash", &format!("{:#018x}", self.type_hash))
196            .field("key_hash", &format!("{:#018x}", self.key_hash))
197            .finish()
198    }
199}
200
201/// Registry mapping query types to shard assignments.
202///
203/// No macro magic. Queries are registered at startup via
204/// `QueryDb::register::<MyQuery>()`. The registry maps `TypeId` to a shard
205/// index derived from the `TypeId`'s hash.
206pub struct QueryRegistry {
207    /// Maps `TypeId` → shard index. Used to route queries to their assigned shard.
208    assignments: HashMap<TypeId, usize>,
209}
210
211impl QueryRegistry {
212    /// Creates an empty registry.
213    #[must_use]
214    pub fn new() -> Self {
215        Self {
216            assignments: HashMap::new(),
217        }
218    }
219
220    /// Registers a query type for cache routing.
221    ///
222    /// Idempotent: re-registering the same query type is a no-op. DB14
223    /// relies on this so the built-in queries pre-registered by
224    /// [`QueryDb::new`][crate::QueryDb::new] do not conflict with legacy
225    /// call-sites (or tests) that also call [`QueryDb::register`].
226    pub fn register<Q: DerivedQuery>(&mut self) {
227        let tid = TypeId::of::<Q>();
228        // Shard assignment is deferred until shard_for() is called,
229        // but we record the TypeId so registered_count() stays accurate.
230        // The actual shard index is computed from the TypeId hash modulo
231        // the shard count.
232        self.assignments.entry(tid).or_insert(0);
233    }
234
235    /// Returns the shard index for a query type.
236    ///
237    /// The shard index is deterministic: `u64::from(Q::QUERY_TYPE_ID) &
238    /// (shard_count - 1)`. This ensures the same query type always maps to
239    /// the same shard **across process boundaries** — critical for PN3
240    /// cold-load, where `ShardedCache::insert_validated` places rehydrated
241    /// entries into the exact shard a later typed `QueryDb::get::<Q>` will
242    /// probe. `shard_count` is required to be a power of two.
243    #[must_use]
244    pub fn shard_for<Q: DerivedQuery>(&self, shard_count: usize) -> usize {
245        Self::shard_for_query_type_id(Q::QUERY_TYPE_ID, shard_count)
246    }
247
248    /// Type-erased shard-routing helper. Given a `query_type_id` (equivalent to
249    /// `Q::QUERY_TYPE_ID`), returns the shard index. Used by PN3 cold-load
250    /// (`ShardedCache::insert_validated`) where the concrete `Q` is not known.
251    #[must_use]
252    pub fn shard_for_query_type_id(query_type_id: u32, shard_count: usize) -> usize {
253        let mask = (shard_count - 1) as u64;
254        (u64::from(query_type_id) & mask) as usize
255    }
256
257    /// Returns the number of registered query types.
258    #[must_use]
259    pub fn registered_count(&self) -> usize {
260        self.assignments.len()
261    }
262}
263
264impl Default for QueryRegistry {
265    fn default() -> Self {
266        Self::new()
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    // Test query type for unit testing
275    struct TestQuery;
276
277    impl DerivedQuery for TestQuery {
278        type Key = u32;
279        type Value = String;
280        const QUERY_TYPE_ID: u32 = 0xF001;
281
282        fn execute(key: &u32, _db: &QueryDb, _snapshot: &GraphSnapshot) -> String {
283            format!("result_{key}")
284        }
285    }
286
287    struct EdgeTrackingQuery;
288
289    impl DerivedQuery for EdgeTrackingQuery {
290        type Key = u32;
291        type Value = Vec<u32>;
292        const QUERY_TYPE_ID: u32 = 0xF002;
293        const TRACKS_EDGE_REVISION: bool = true;
294
295        fn execute(_key: &u32, _db: &QueryDb, _snapshot: &GraphSnapshot) -> Vec<u32> {
296            vec![]
297        }
298    }
299
300    #[test]
301    fn query_key_equality() {
302        let k1 = QueryKey::new::<TestQuery>(&42);
303        let k2 = QueryKey::new::<TestQuery>(&42);
304        let k3 = QueryKey::new::<TestQuery>(&99);
305
306        assert_eq!(k1, k2);
307        assert_ne!(k1, k3);
308    }
309
310    #[test]
311    fn query_key_different_types_differ() {
312        let k1 = QueryKey::new::<TestQuery>(&42);
313        let k2 = QueryKey::new::<EdgeTrackingQuery>(&42);
314        assert_ne!(
315            k1, k2,
316            "different query types should produce different keys"
317        );
318    }
319
320    #[test]
321    fn registry_shard_assignment_deterministic() {
322        let reg = QueryRegistry::new();
323        let s1 = reg.shard_for::<TestQuery>(64);
324        let s2 = reg.shard_for::<TestQuery>(64);
325        assert_eq!(s1, s2);
326        assert!(s1 < 64);
327    }
328
329    #[test]
330    fn registry_register_and_count() {
331        let mut reg = QueryRegistry::new();
332        assert_eq!(reg.registered_count(), 0);
333        reg.register::<TestQuery>();
334        assert_eq!(reg.registered_count(), 1);
335        reg.register::<EdgeTrackingQuery>();
336        assert_eq!(reg.registered_count(), 2);
337    }
338
339    /// DB14 + Codex review (2026-04-15): repeat registration of the same
340    /// query type is now a no-op so that legacy / test call sites that
341    /// invoke `db.register::<Q>()` after the constructor's built-in
342    /// pre-registration do not trip a debug assert. Verify that:
343    ///  * count stays stable across repeats,
344    ///  * `shard_for` keeps returning the same deterministic index.
345    #[test]
346    fn registry_register_is_idempotent() {
347        let mut reg = QueryRegistry::new();
348        reg.register::<TestQuery>();
349        let before_shard = reg.shard_for::<TestQuery>(64);
350        let before_count = reg.registered_count();
351
352        reg.register::<TestQuery>();
353        reg.register::<TestQuery>();
354        reg.register::<TestQuery>();
355
356        assert_eq!(
357            reg.registered_count(),
358            before_count,
359            "duplicate register::<Q>() must not bump registered_count"
360        );
361        assert_eq!(
362            reg.shard_for::<TestQuery>(64),
363            before_shard,
364            "duplicate register::<Q>() must not change shard routing"
365        );
366    }
367}