sqry_db/query.rs
1//! Derived query trait and `TypeId`-based query registry.
2//!
3//! All analysis queries implement [`DerivedQuery`]. The registry maps each
4//! query type's `TypeId` to a shard assignment in the [`ShardedCache`].
5//! Registration is explicit — no proc macros.
6
7use std::any::TypeId;
8use std::collections::HashMap;
9use std::hash::{Hash, Hasher};
10
11use sqry_core::graph::unified::concurrent::GraphSnapshot;
12
13use crate::QueryDb;
14
15/// A derived query whose result is memoized in the [`ShardedCache`].
16///
17/// Concrete queries implement this trait. `QueryDb::get::<Q>(key)` checks
18/// the cache, validates all applicable dependency tiers, and either returns
19/// the cached value or calls `Q::execute`, records dependencies, and caches.
20///
21/// # Dependency tracking
22///
23/// During `execute`, the query should call [`record_file_dep`](crate::dependency::record_file_dep)
24/// for every `FileId` it reads from. The thread-local recorder collects these
25/// and pairs them with revision numbers after execution completes.
26///
27/// # Tier flags
28///
29/// - `TRACKS_EDGE_REVISION`: Set to `true` for queries that answer "who points
30/// at X" (callers, cycle membership, reachability). Invalidated when ANY
31/// file's edges change.
32/// - `TRACKS_METADATA_REVISION`: Set to `true` for queries that read node
33/// metadata (entry-point classification for `find_unused`). Invalidated when
34/// node metadata changes without edge changes.
35pub trait DerivedQuery: Send + Sync + 'static {
36 /// The input key for this query.
37 ///
38 /// The `serde::Serialize` bound is required by the PN3 persistence layer:
39 /// [`ShardedCache::insert_query`] serialises the key to raw bytes at insert
40 /// time so that the key can be round-tripped to disk without re-running the
41 /// query. All built-in query keys already implement `Serialize`; downstream
42 /// or test keys using primitive types (`u32`, `String`, etc.) also satisfy
43 /// this bound automatically.
44 type Key: Hash + Eq + Clone + Send + serde::Serialize + 'static;
45 /// The output value. Must be `Clone` for cache retrieval.
46 ///
47 /// The `serde::Serialize` bound is required by the PN3 persistence layer
48 /// so that warm-path writes can be saved to disk.
49 /// The `serde::de::DeserializeOwned` bound is required by the PN3 cold-load
50 /// path: when a rehydrated entry is first found by a typed `get::<Q>`, the
51 /// raw result bytes must be decoded back into `Q::Value` before they can
52 /// be returned to the caller. Without this bound, "first query after cold
53 /// start is free" would degrade into "first query misses, second is free",
54 /// which violates spec §2.
55 /// Non-serialisable value types must use `PERSISTENT = false` to opt out
56 /// of raw-byte retention.
57 type Value: Clone + Send + Sync + serde::Serialize + serde::de::DeserializeOwned + 'static;
58
59 /// Stable on-disk discriminator for PN3 derived-cache persistence.
60 ///
61 /// This `u32` is the key written to `.sqry/graph/derived.sqry` to
62 /// identify the query type across process restarts. Unlike
63 /// `TypeId::of::<Self>()`, which is process-local and not stable
64 /// across compiler versions, this ID is fixed at authoring time and
65 /// **must never change or be reused** once any derived cache file
66 /// containing it has been written.
67 ///
68 /// # Reserved ranges
69 ///
70 /// - `0x0000` — invalid / unknown. Must never appear on disk.
71 /// - `0x0001..=0x000F` — built-in queries (15 assigned).
72 /// - `0x0010..=0x0FFF` — reserved for additional built-ins.
73 /// - `0x1000..=0xFFFF` — reserved for downstream / future built-ins.
74 ///
75 /// See `sqry_db::queries::type_ids` for the canonical constants.
76 /// There is no default value: every implementation must set this
77 /// explicitly to prevent accidental ID collisions.
78 const QUERY_TYPE_ID: u32;
79
80 /// Whether this query's `Value` satisfies the serde bounds required
81 /// for PN3 cold-start persistence.
82 ///
83 /// Set to `false` for queries whose `Value` type cannot (or should
84 /// not) be serialized to disk. When `false`, the PN3 persistence
85 /// layer skips this query entirely and will recompute it on demand
86 /// after a cold start.
87 ///
88 /// The default is `true`; implementations opt out by overriding.
89 const PERSISTENT: bool = true;
90
91 /// Whether this query depends on the global edge topology.
92 ///
93 /// When `true`, the cached result is invalidated whenever the global edge
94 /// revision counter advances (even if the specific files the query read
95 /// haven't changed). This handles the "missing negative dependency"
96 /// problem: a new file could add an edge that the query never saw.
97 const TRACKS_EDGE_REVISION: bool = false;
98
99 /// Whether this query depends on node metadata.
100 ///
101 /// When `true`, the cached result is invalidated whenever the global
102 /// metadata revision counter advances.
103 const TRACKS_METADATA_REVISION: bool = false;
104
105 /// Computes the query result.
106 ///
107 /// Implementations MUST call [`record_file_dep`](crate::dependency::record_file_dep)
108 /// for every `FileId` they read from the snapshot.
109 fn execute(key: &Self::Key, db: &QueryDb, snapshot: &GraphSnapshot) -> Self::Value;
110}
111
112/// A type-erased query cache key combining the query type's stable
113/// `QUERY_TYPE_ID` discriminator and the hash of the postcard encoding of the
114/// input key.
115///
116/// Key equality is **build/process-independent**: both components derive from
117/// values that are stable across restarts, enabling PN3's `load_derived` to
118/// rehydrate entries directly into the same cache slots warm-path
119/// `insert_query` and `get` use.
120#[derive(Clone, Eq, PartialEq)]
121pub struct QueryKey {
122 /// `Q::QUERY_TYPE_ID` (the stable u32 discriminator from spec §5.1) widened
123 /// to `u64`. Shared between warm-path `QueryKey::new::<Q>` and cold-path
124 /// `ShardedCache::insert_validated`.
125 type_hash: u64,
126 /// Hash of `postcard::to_allocvec(&key)` bytes. Shared between warm-path
127 /// and cold-path for the same reason.
128 key_hash: u64,
129}
130
131impl QueryKey {
132 /// Creates a new query key from a concrete query type and input key.
133 ///
134 /// Both components of the key are computed from **stable, process-independent**
135 /// sources so that entries rehydrated by PN3's cold-load path
136 /// (`ShardedCache::insert_validated`, which has no typed `Q` and works from
137 /// raw bytes alone) live in the SAME key space as warm-path entries. This
138 /// is what makes "first query after a cold start is free" actually hold:
139 ///
140 /// - `type_hash` is `Q::QUERY_TYPE_ID as u64` (the stable on-disk
141 /// discriminator defined in spec §5.1), **not** `TypeId::of::<Q>()` which
142 /// is build/process-local.
143 /// - `key_hash` is a hash of the postcard encoding of `key`, matching what
144 /// `ShardedCache::insert_query` already stores as `raw_key_bytes` and what
145 /// `ShardedCache::insert_validated` hashes at cold-load time.
146 ///
147 /// Deterministic serialization of the key is guaranteed by the
148 /// `serde::Serialize` bound on `DerivedQuery::Key`; a panic on serialization
149 /// failure would indicate a serde derive bug and is treated as a
150 /// non-recoverable programmer error here.
151 pub fn new<Q: DerivedQuery>(key: &Q::Key) -> Self {
152 let type_hash = u64::from(Q::QUERY_TYPE_ID);
153 let key_hash = Self::hash_serialized_key(key);
154 Self {
155 type_hash,
156 key_hash,
157 }
158 }
159
160 /// Creates a raw query key for testing purposes.
161 #[doc(hidden)]
162 #[must_use]
163 pub fn from_raw(type_hash: u64, key_hash: u64) -> Self {
164 Self {
165 type_hash,
166 key_hash,
167 }
168 }
169
170 /// Hash the postcard encoding of a key. This matches the key-space used by
171 /// `ShardedCache::insert_validated` at cold-load time: both paths hash the
172 /// same byte sequence, so a typed `get::<Q>(&key)` issued immediately after
173 /// `load_derived` will find the rehydrated entry on the **first** call.
174 fn hash_serialized_key<K: serde::Serialize>(key: &K) -> u64 {
175 let bytes = postcard::to_allocvec(key).expect(
176 "DerivedQuery::Key requires serde::Serialize to be infallible; \
177 postcard::to_allocvec must not fail",
178 );
179 let mut hasher = std::hash::DefaultHasher::new();
180 bytes.hash(&mut hasher);
181 hasher.finish()
182 }
183}
184
185impl Hash for QueryKey {
186 fn hash<H: Hasher>(&self, state: &mut H) {
187 self.type_hash.hash(state);
188 self.key_hash.hash(state);
189 }
190}
191
192impl std::fmt::Debug for QueryKey {
193 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194 f.debug_struct("QueryKey")
195 .field("type_hash", &format!("{:#018x}", self.type_hash))
196 .field("key_hash", &format!("{:#018x}", self.key_hash))
197 .finish()
198 }
199}
200
201/// Registry mapping query types to shard assignments.
202///
203/// No macro magic. Queries are registered at startup via
204/// `QueryDb::register::<MyQuery>()`. The registry maps `TypeId` to a shard
205/// index derived from the `TypeId`'s hash.
206pub struct QueryRegistry {
207 /// Maps `TypeId` → shard index. Used to route queries to their assigned shard.
208 assignments: HashMap<TypeId, usize>,
209}
210
211impl QueryRegistry {
212 /// Creates an empty registry.
213 #[must_use]
214 pub fn new() -> Self {
215 Self {
216 assignments: HashMap::new(),
217 }
218 }
219
220 /// Registers a query type for cache routing.
221 ///
222 /// Idempotent: re-registering the same query type is a no-op. DB14
223 /// relies on this so the built-in queries pre-registered by
224 /// [`QueryDb::new`][crate::QueryDb::new] do not conflict with legacy
225 /// call-sites (or tests) that also call [`QueryDb::register`].
226 pub fn register<Q: DerivedQuery>(&mut self) {
227 let tid = TypeId::of::<Q>();
228 // Shard assignment is deferred until shard_for() is called,
229 // but we record the TypeId so registered_count() stays accurate.
230 // The actual shard index is computed from the TypeId hash modulo
231 // the shard count.
232 self.assignments.entry(tid).or_insert(0);
233 }
234
235 /// Returns the shard index for a query type.
236 ///
237 /// The shard index is deterministic: `u64::from(Q::QUERY_TYPE_ID) &
238 /// (shard_count - 1)`. This ensures the same query type always maps to
239 /// the same shard **across process boundaries** — critical for PN3
240 /// cold-load, where `ShardedCache::insert_validated` places rehydrated
241 /// entries into the exact shard a later typed `QueryDb::get::<Q>` will
242 /// probe. `shard_count` is required to be a power of two.
243 #[must_use]
244 pub fn shard_for<Q: DerivedQuery>(&self, shard_count: usize) -> usize {
245 Self::shard_for_query_type_id(Q::QUERY_TYPE_ID, shard_count)
246 }
247
248 /// Type-erased shard-routing helper. Given a `query_type_id` (equivalent to
249 /// `Q::QUERY_TYPE_ID`), returns the shard index. Used by PN3 cold-load
250 /// (`ShardedCache::insert_validated`) where the concrete `Q` is not known.
251 #[must_use]
252 pub fn shard_for_query_type_id(query_type_id: u32, shard_count: usize) -> usize {
253 let mask = (shard_count - 1) as u64;
254 (u64::from(query_type_id) & mask) as usize
255 }
256
257 /// Returns the number of registered query types.
258 #[must_use]
259 pub fn registered_count(&self) -> usize {
260 self.assignments.len()
261 }
262}
263
264impl Default for QueryRegistry {
265 fn default() -> Self {
266 Self::new()
267 }
268}
269
270#[cfg(test)]
271mod tests {
272 use super::*;
273
274 // Test query type for unit testing
275 struct TestQuery;
276
277 impl DerivedQuery for TestQuery {
278 type Key = u32;
279 type Value = String;
280 const QUERY_TYPE_ID: u32 = 0xF001;
281
282 fn execute(key: &u32, _db: &QueryDb, _snapshot: &GraphSnapshot) -> String {
283 format!("result_{key}")
284 }
285 }
286
287 struct EdgeTrackingQuery;
288
289 impl DerivedQuery for EdgeTrackingQuery {
290 type Key = u32;
291 type Value = Vec<u32>;
292 const QUERY_TYPE_ID: u32 = 0xF002;
293 const TRACKS_EDGE_REVISION: bool = true;
294
295 fn execute(_key: &u32, _db: &QueryDb, _snapshot: &GraphSnapshot) -> Vec<u32> {
296 vec![]
297 }
298 }
299
300 #[test]
301 fn query_key_equality() {
302 let k1 = QueryKey::new::<TestQuery>(&42);
303 let k2 = QueryKey::new::<TestQuery>(&42);
304 let k3 = QueryKey::new::<TestQuery>(&99);
305
306 assert_eq!(k1, k2);
307 assert_ne!(k1, k3);
308 }
309
310 #[test]
311 fn query_key_different_types_differ() {
312 let k1 = QueryKey::new::<TestQuery>(&42);
313 let k2 = QueryKey::new::<EdgeTrackingQuery>(&42);
314 assert_ne!(
315 k1, k2,
316 "different query types should produce different keys"
317 );
318 }
319
320 #[test]
321 fn registry_shard_assignment_deterministic() {
322 let reg = QueryRegistry::new();
323 let s1 = reg.shard_for::<TestQuery>(64);
324 let s2 = reg.shard_for::<TestQuery>(64);
325 assert_eq!(s1, s2);
326 assert!(s1 < 64);
327 }
328
329 #[test]
330 fn registry_register_and_count() {
331 let mut reg = QueryRegistry::new();
332 assert_eq!(reg.registered_count(), 0);
333 reg.register::<TestQuery>();
334 assert_eq!(reg.registered_count(), 1);
335 reg.register::<EdgeTrackingQuery>();
336 assert_eq!(reg.registered_count(), 2);
337 }
338
339 /// DB14 + Codex review (2026-04-15): repeat registration of the same
340 /// query type is now a no-op so that legacy / test call sites that
341 /// invoke `db.register::<Q>()` after the constructor's built-in
342 /// pre-registration do not trip a debug assert. Verify that:
343 /// * count stays stable across repeats,
344 /// * `shard_for` keeps returning the same deterministic index.
345 #[test]
346 fn registry_register_is_idempotent() {
347 let mut reg = QueryRegistry::new();
348 reg.register::<TestQuery>();
349 let before_shard = reg.shard_for::<TestQuery>(64);
350 let before_count = reg.registered_count();
351
352 reg.register::<TestQuery>();
353 reg.register::<TestQuery>();
354 reg.register::<TestQuery>();
355
356 assert_eq!(
357 reg.registered_count(),
358 before_count,
359 "duplicate register::<Q>() must not bump registered_count"
360 );
361 assert_eq!(
362 reg.shard_for::<TestQuery>(64),
363 before_shard,
364 "duplicate register::<Q>() must not change shard routing"
365 );
366 }
367}