sqry_daemon/workspace/hook.rs
1//! `SqrydHook` — post-publish persistence hook.
2//!
3//! Phase 6c of the sqryd plan, hardened by PF03A (corrective program
4//! 2026-05-07). Every successful publish triggers a best-effort write
5//! of the derived-analysis cache (`.sqry/graph/derived.sqry`) via
6//! `sqry_db::persistence::save_derived`, on a background tokio task
7//! with a configurable timeout. Errors and timeouts are logged at
8//! WARN and absorbed — they never fail the query/publish path.
9//!
10//! This module defines the [`SqrydHook`] trait, the [`NoOpHook`] default
11//! used for tests and embedded callers, and the production
12//! [`QueryDbHook`] that the `sqryd` binary installs at startup
13//! (PF03B).
14//!
15//! ## PF03A architectural decisions
16//!
17//! ### Crate / feature boundary
18//!
19//! `sqry-daemon` depends on `sqry-db` unconditionally (added in PF03A).
20//! Earlier comments suggested gating the dependency behind a `sqry-db-hook`
21//! Cargo feature so embedders could opt out of the writer; PF03A discards
22//! that gating because the corrective program (see
23//! `docs/development/generational-analysis-platform/priority-followups/03_IMPLEMENTATION_PLAN.md`
24//! §A2) demands the production hook be present in every production
25//! sqryd build, not opt-in. Embedders that want the no-persistence
26//! behaviour pass [`noop_hook`] or any custom impl explicitly into
27//! [`super::WorkspaceManager::set_hook`].
28//!
29//! ### Snapshot SHA timing (PF03A decision A, corrected by PF09)
30//!
31//! [`QueryDbHook::on_publish`] first writes the published graph to the
32//! canonical `<workspace_root>/.sqry/graph/snapshot.sqry` path, then
33//! hashes that file, reloads it, warms a bounded query inventory over the
34//! reloaded snapshot, and saves `derived.sqry` with the same SHA. That
35//! makes the snapshot file the single verified identity source for both
36//! `DerivedHeader.snapshot_sha256` and the persisted query entries.
37//! A stale or missing pre-existing `snapshot.sqry` is overwritten by the
38//! published graph before the derived cache is produced.
39//!
40//! ### Async lifetime / ownership (PF03A decision B)
41//!
42//! The hook is invoked with an owned `Arc<CodeGraph>` (cloned from the
43//! published workspace inside `WorkspaceManager`). The spawned task
44//! moves that `Arc` into its closure, takes a [`GraphSnapshot`] from it,
45//! and wraps the snapshot in its own `Arc` for the temporary
46//! [`sqry_db::QueryDb`]. The strong reference held by the spawned task
47//! keeps the underlying graph data alive until `save_derived` returns
48//! (or the timeout fires), independent of any concurrent workspace
49//! `unload`, eviction, or replacement publish. Once the task drops, the
50//! `Arc` is released; if the manager has already evicted the workspace
51//! the data is freed at that point. There is no path that can leave
52//! the spawned task reading freed memory.
53//!
54//! ### Failure isolation
55//!
56//! All work runs under [`spawn_hook`], which wraps the future in
57//! [`tokio::time::timeout`] and logs both error and timeout outcomes
58//! at WARN. The publish/query path never observes the result — the
59//! caller does not await the spawned task.
60//!
61//! The hook runs on the current tokio runtime via `tokio::spawn`; the
62//! publish call site does not await it. The default timeout is taken
63//! from `DaemonConfig::rebuild_drain_timeout_ms` (5 s by default),
64//! clampable by the call site if a tighter ceiling is desired.
65
66use std::{path::Path, sync::Arc, time::Duration};
67
68use sqry_core::graph::CodeGraph;
69use sqry_db::queries::{CalleesQuery, CallersQuery, RelationKey};
70use tracing::warn;
71
72const MAX_DERIVED_WARMUP_SYMBOLS: usize = 64;
73
74/// Signature for a post-publish persistence hook.
75///
76/// Called from [`super::WorkspaceManager::publish_and_retain`]
77/// *after* the admission commit has succeeded, with the published
78/// `Arc<CodeGraph>` and the workspace root path. The hook is
79/// expected to return immediately — any actual IO should be
80/// spawned on a tokio task via [`spawn_hook`] or equivalent —
81/// because `publish_and_retain` is a sync critical section.
82pub trait SqrydHook: Send + Sync + std::fmt::Debug {
83 /// Notify the hook that a fresh graph has been published for
84 /// `workspace_root`. Implementations should NOT block; they
85 /// should spawn a background task and return.
86 fn on_publish(&self, workspace_root: &Path, graph: Arc<CodeGraph>);
87}
88
89impl<T: SqrydHook + ?Sized> SqrydHook for Arc<T> {
90 fn on_publish(&self, workspace_root: &Path, graph: Arc<CodeGraph>) {
91 (**self).on_publish(workspace_root, graph);
92 }
93}
94
95/// Null implementation — used by unit tests + the Phase 6c
96/// default when no production hook is wired. Logs nothing, does
97/// nothing, adds no runtime overhead.
98#[derive(Debug, Default, Clone, Copy)]
99pub struct NoOpHook;
100
101impl SqrydHook for NoOpHook {
102 fn on_publish(&self, _workspace_root: &Path, _graph: Arc<CodeGraph>) {
103 // deliberately empty
104 }
105}
106
107/// Shared handle to the active hook. The manager stores an
108/// `ArcSwap<Arc<dyn SqrydHook>>` so Task 9 can install the
109/// production hook after the daemon boots (once the sqry-db
110/// `QueryDb` is built), and unit tests can install a recording
111/// hook at construction time.
112pub type SharedHook = Arc<dyn SqrydHook>;
113
114/// Convenience constructor for [`NoOpHook`] as a [`SharedHook`].
115#[must_use]
116pub fn noop_hook() -> SharedHook {
117 Arc::new(NoOpHook)
118}
119
120/// Spawn an async persistence task with the configured timeout.
121///
122/// The task's result is never awaited by the caller. Errors and
123/// timeouts are logged at WARN; the query path is unaffected.
124///
125/// This helper is public so the production
126/// [`super::manager::WorkspaceManager`] and custom `SqrydHook`
127/// impls can share the same timeout-and-absorb pattern.
128pub fn spawn_hook<F, Fut, E>(
129 timeout: Duration,
130 workspace_root: std::path::PathBuf,
131 task_label: &'static str,
132 fut_factory: F,
133) where
134 F: FnOnce() -> Fut + Send + 'static,
135 Fut: std::future::Future<Output = Result<(), E>> + Send + 'static,
136 E: std::fmt::Display + Send + 'static,
137{
138 tokio::spawn(async move {
139 let fut = fut_factory();
140 match tokio::time::timeout(timeout, fut).await {
141 Ok(Ok(())) => {}
142 Ok(Err(err)) => {
143 warn!(
144 task = task_label,
145 workspace = %workspace_root.display(),
146 error = %err,
147 "sqryd hook {task_label} failed (absorbed; query path continues)",
148 );
149 }
150 Err(_elapsed) => {
151 let timeout_ms = u64::try_from(timeout.as_millis()).unwrap_or(u64::MAX);
152 warn!(
153 task = task_label,
154 workspace = %workspace_root.display(),
155 timeout_ms,
156 "sqryd hook {task_label} timed out (absorbed; query path continues)",
157 );
158 }
159 }
160 });
161}
162
163// ---------------------------------------------------------------------------
164// QueryDbHook — production post-publish hook.
165//
166// PF03A introduces this type. PF03B (`sqryd` startup) calls
167// `WorkspaceManager::set_hook(QueryDbHook::new(timeout))` so every published
168// graph triggers a `sqry_db::persistence::save_derived` call against the
169// canonical `<workspace_root>/.sqry/graph/derived.sqry` path. Failures are
170// logged at WARN and absorbed; the publish path is never blocked.
171// ---------------------------------------------------------------------------
172
173/// Production [`SqrydHook`] backed by `sqry_db::persistence::save_derived`.
174///
175/// See the module-level docs for the snapshot-SHA-timing and lifetime
176/// decisions (PF03A A and B). The hook is parameterised by:
177///
178/// - `timeout` — wall-clock cap applied via [`spawn_hook`] /
179/// [`tokio::time::timeout`]. Defaults to `DaemonConfig::rebuild_drain_timeout_ms`
180/// (5 s) when constructed via [`Self::new`].
181/// - `query_db_config` — passed straight to [`sqry_db::QueryDb::new`] and
182/// used by [`sqry_db::derived_path`] to compute the target file. The
183/// default value points at `derived.sqry` and uses the standard
184/// per-entry size cap; production sqryd should generally accept the
185/// default.
186#[derive(Debug, Clone)]
187pub struct QueryDbHook {
188 timeout: Duration,
189 query_db_config: sqry_db::QueryDbConfig,
190}
191
192impl QueryDbHook {
193 /// Construct a hook with the supplied timeout and the default
194 /// [`sqry_db::QueryDbConfig`].
195 ///
196 /// PF03B's startup path uses this constructor with
197 /// `DaemonConfig::rebuild_drain_timeout_ms`.
198 #[must_use]
199 pub fn new(timeout: Duration) -> Arc<Self> {
200 Arc::new(Self {
201 timeout,
202 query_db_config: sqry_db::QueryDbConfig::default(),
203 })
204 }
205
206 /// Construct a hook with a caller-provided [`sqry_db::QueryDbConfig`].
207 ///
208 /// Reserved for callers that need to override the default
209 /// `derived_persistence_filename` or per-entry size cap. Production
210 /// sqryd should normally use [`Self::new`].
211 #[must_use]
212 pub fn with_query_db_config(
213 timeout: Duration,
214 query_db_config: sqry_db::QueryDbConfig,
215 ) -> Arc<Self> {
216 Arc::new(Self {
217 timeout,
218 query_db_config,
219 })
220 }
221
222 /// Returns the configured wall-clock timeout. Exposed for tests and
223 /// observability surfaces.
224 #[must_use]
225 pub fn timeout(&self) -> Duration {
226 self.timeout
227 }
228}
229
230impl SqrydHook for QueryDbHook {
231 fn on_publish(&self, workspace_root: &Path, graph: Arc<CodeGraph>) {
232 let timeout = self.timeout;
233 let query_db_config = self.query_db_config.clone();
234 let workspace_root_owned = workspace_root.to_path_buf();
235
236 // Move owned values into the spawned task. spawn_hook wraps the
237 // future in `tokio::time::timeout` and absorbs errors / timeouts.
238 spawn_hook::<_, _, anyhow::Error>(
239 timeout,
240 workspace_root_owned.clone(),
241 "query-db-save-derived",
242 move || {
243 let workspace_root = workspace_root_owned;
244 let graph = graph;
245 let query_db_config = query_db_config;
246 async move { run_save_derived(workspace_root, graph, query_db_config).await }
247 },
248 );
249 }
250}
251
252/// Body of the production hook: persist the published graph to the canonical
253/// snapshot path, reload that exact on-disk snapshot, warm a bounded inventory
254/// of persistent relation queries, and persist the derived cache via
255/// `save_derived`.
256///
257/// All filesystem-touching operations run under [`tokio::task::spawn_blocking`]
258/// so the runtime never blocks on synchronous IO.
259async fn run_save_derived(
260 workspace_root: std::path::PathBuf,
261 graph: Arc<CodeGraph>,
262 query_db_config: sqry_db::QueryDbConfig,
263) -> anyhow::Result<()> {
264 tokio::task::spawn_blocking(move || {
265 run_save_derived_blocking(&workspace_root, &graph, query_db_config)
266 })
267 .await
268 .map_err(|join_err| anyhow::anyhow!("spawn_blocking(query-db-save-derived) join: {join_err}"))?
269}
270
271fn run_save_derived_blocking(
272 workspace_root: &Path,
273 graph: &CodeGraph,
274 query_db_config: sqry_db::QueryDbConfig,
275) -> anyhow::Result<()> {
276 let graph_dir = workspace_root.join(".sqry").join("graph");
277 let snapshot_path = graph_dir.join("snapshot.sqry");
278 std::fs::create_dir_all(&graph_dir)?;
279
280 sqry_core::graph::unified::persistence::save_to_path(graph, &snapshot_path)?;
281 let sha = sqry_db::persistence::compute_file_sha256(&snapshot_path).map_err(|io_err| {
282 anyhow::anyhow!("compute_file_sha256({}): {io_err}", snapshot_path.display())
283 })?;
284
285 let persisted_graph =
286 sqry_core::graph::unified::persistence::load_from_path(&snapshot_path, None)?;
287 let snapshot_arc = Arc::new(persisted_graph.snapshot());
288 let db = sqry_db::QueryDb::new(snapshot_arc, query_db_config);
289 let warmed_entries = warm_persistent_queries(&db);
290 tracing::debug!(
291 workspace = %workspace_root.display(),
292 warmed_entries,
293 "QueryDbHook: warmed persistent query entries before derived-cache save"
294 );
295
296 let derived_path = sqry_db::derived_path(workspace_root, db.config());
297 sqry_db::persistence::save_derived(&db, sha, &derived_path, workspace_root)?;
298
299 Ok(())
300}
301
302fn warm_persistent_queries(db: &sqry_db::QueryDb) -> usize {
303 let mut symbol_names = std::collections::BTreeSet::new();
304 for (_node_id, node) in db.snapshot().iter_nodes() {
305 if node.is_unified_loser() {
306 continue;
307 }
308 if let Some(name) = db.snapshot().strings().resolve(node.name) {
309 symbol_names.insert(name.to_string());
310 }
311 if let Some(qualified_name_id) = node.qualified_name
312 && let Some(qualified_name) = db.snapshot().strings().resolve(qualified_name_id)
313 {
314 symbol_names.insert(qualified_name.to_string());
315 }
316 if symbol_names.len() >= MAX_DERIVED_WARMUP_SYMBOLS {
317 break;
318 }
319 }
320
321 let mut warmed_entries = 0usize;
322 for symbol_name in symbol_names {
323 let key = RelationKey::exact(symbol_name);
324 let _ = db.get::<CallersQuery>(&key);
325 let _ = db.get::<CalleesQuery>(&key);
326 warmed_entries += 2;
327 }
328 warmed_entries
329}
330
331/// Recording hook used by unit tests to observe hook invocations
332/// without exercising the real persistence path.
333#[doc(hidden)]
334#[derive(Debug, Default)]
335pub struct RecordingHook {
336 pub invocations: parking_lot::Mutex<Vec<std::path::PathBuf>>,
337}
338
339impl RecordingHook {
340 #[must_use]
341 pub fn new() -> Arc<Self> {
342 Arc::new(Self::default())
343 }
344
345 #[must_use]
346 pub fn invocation_count(&self) -> usize {
347 self.invocations.lock().len()
348 }
349
350 #[must_use]
351 pub fn invocation_roots(&self) -> Vec<std::path::PathBuf> {
352 self.invocations.lock().clone()
353 }
354}
355
356impl SqrydHook for RecordingHook {
357 fn on_publish(&self, workspace_root: &Path, _graph: Arc<CodeGraph>) {
358 self.invocations.lock().push(workspace_root.to_path_buf());
359 }
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365
366 #[test]
367 fn noop_hook_compiles_through_shared_dispatch() {
368 let hook: SharedHook = noop_hook();
369 let graph = Arc::new(CodeGraph::new());
370 hook.on_publish(Path::new("/repos/example"), graph);
371 }
372
373 #[test]
374 fn recording_hook_captures_invocations_in_order() {
375 let hook = RecordingHook::new();
376 let graph = Arc::new(CodeGraph::new());
377 hook.on_publish(Path::new("/repos/a"), Arc::clone(&graph));
378 hook.on_publish(Path::new("/repos/b"), Arc::clone(&graph));
379 assert_eq!(hook.invocation_count(), 2);
380 let roots = hook.invocation_roots();
381 assert_eq!(roots[0], Path::new("/repos/a"));
382 assert_eq!(roots[1], Path::new("/repos/b"));
383 }
384
385 #[tokio::test]
386 async fn spawn_hook_absorbs_error() {
387 // Hook returns Err; timeout wrapper logs at WARN and
388 // absorbs. Success criterion: the spawned task completes
389 // without panic.
390 spawn_hook::<_, _, &'static str>(
391 Duration::from_millis(100),
392 std::path::PathBuf::from("/repos/example"),
393 "test-hook",
394 || async { Err("simulated failure") },
395 );
396 // Give the spawned task time to run.
397 tokio::time::sleep(Duration::from_millis(50)).await;
398 }
399
400 #[tokio::test]
401 async fn spawn_hook_absorbs_timeout() {
402 spawn_hook::<_, _, &'static str>(
403 Duration::from_millis(10),
404 std::path::PathBuf::from("/repos/example"),
405 "test-hook",
406 || async {
407 tokio::time::sleep(Duration::from_secs(1)).await;
408 Ok(())
409 },
410 );
411 tokio::time::sleep(Duration::from_millis(50)).await;
412 }
413
414 // ---------------------------------------------------------------------
415 // PF03A regression coverage for the production QueryDbHook.
416 // ---------------------------------------------------------------------
417
418 /// Construction smoke test: the production hook builds with the default
419 /// config and reports the requested timeout.
420 #[test]
421 fn pf03a_query_db_hook_constructs_with_requested_timeout() {
422 let hook = QueryDbHook::new(Duration::from_millis(1234));
423 assert_eq!(hook.timeout(), Duration::from_millis(1234));
424 }
425
426 /// `with_query_db_config` accepts a custom [`sqry_db::QueryDbConfig`]
427 /// and threads it through unchanged.
428 #[test]
429 fn pf03a_query_db_hook_accepts_custom_config() {
430 let cfg = sqry_db::QueryDbConfig::default();
431 let hook = QueryDbHook::with_query_db_config(Duration::from_millis(50), cfg);
432 // The hook holds an owned clone; the timeout accessor proves the
433 // construction path completed.
434 assert_eq!(hook.timeout(), Duration::from_millis(50));
435 }
436
437 /// PF09 correction: when the canonical snapshot file is absent, the
438 /// hook writes the published graph there first and then binds
439 /// derived.sqry to that freshly-written snapshot identity.
440 #[tokio::test]
441 async fn pf09_query_db_hook_no_snapshot_file_writes_snapshot_and_derived() {
442 let workspace = tempfile::tempdir().expect("tempdir");
443 let hook = QueryDbHook::new(Duration::from_secs(2));
444 let graph = Arc::new(CodeGraph::new());
445
446 // Create the .sqry/graph/ directory but no snapshot.sqry file —
447 // mirrors a publish that hasn't yet flushed to disk.
448 std::fs::create_dir_all(workspace.path().join(".sqry").join("graph")).unwrap();
449
450 hook.on_publish(workspace.path(), graph);
451
452 // Give the spawned task a generous window to complete.
453 tokio::time::sleep(Duration::from_millis(500)).await;
454
455 let snapshot = workspace
456 .path()
457 .join(".sqry")
458 .join("graph")
459 .join("snapshot.sqry");
460 let derived = workspace
461 .path()
462 .join(".sqry")
463 .join("graph")
464 .join("derived.sqry");
465 assert!(
466 snapshot.exists(),
467 "PF09: hook must write the published snapshot when it is absent (got {})",
468 snapshot.display()
469 );
470 assert!(
471 derived.exists(),
472 "PF09: hook must create derived.sqry after writing snapshot.sqry (got {})",
473 derived.display()
474 );
475 }
476
477 /// PF03A failure isolation: when the snapshot file is malformed (cannot
478 /// be hashed because the parent directory was deleted mid-flight, or
479 /// the path has been replaced by a directory), the hook absorbs the
480 /// error and never panics. The publish path must keep running.
481 #[tokio::test]
482 async fn pf03a_query_db_hook_absorbs_save_failure() {
483 let workspace = tempfile::tempdir().expect("tempdir");
484 let hook = QueryDbHook::new(Duration::from_secs(1));
485 let graph = Arc::new(CodeGraph::new());
486
487 // Create a `snapshot.sqry` directory entry that's actually a
488 // directory; `compute_file_sha256` will fail with a non-NotFound
489 // error and `save_derived` is never called. Either way, the hook
490 // returns without panicking.
491 let snap_dir = workspace.path().join(".sqry").join("graph");
492 std::fs::create_dir_all(&snap_dir).unwrap();
493 std::fs::create_dir_all(snap_dir.join("snapshot.sqry")).unwrap();
494
495 hook.on_publish(workspace.path(), graph);
496 tokio::time::sleep(Duration::from_millis(100)).await;
497
498 // No panic = success. derived.sqry must not exist (the hook never
499 // got far enough to write it).
500 let derived = snap_dir.join("derived.sqry");
501 assert!(
502 !derived.exists(),
503 "PF03A: a hashing failure must not leave a partially-written derived.sqry"
504 );
505 }
506
507 /// PF03A/PF09 end-to-end happy path: with a published graph, the hook
508 /// writes snapshot.sqry and then derived.sqry to canonical paths before
509 /// its timeout fires.
510 #[tokio::test]
511 async fn pf03a_query_db_hook_writes_derived_sqry_when_snapshot_present() {
512 let workspace = tempfile::tempdir().expect("tempdir");
513 let snap_dir = workspace.path().join(".sqry").join("graph");
514 std::fs::create_dir_all(&snap_dir).unwrap();
515
516 let graph_owned = CodeGraph::new();
517 let hook = QueryDbHook::new(Duration::from_secs(5));
518 let graph = Arc::new(graph_owned);
519 hook.on_publish(workspace.path(), graph);
520
521 // Poll for the derived file with a generous deadline so slow CI
522 // runners don't false-negative.
523 let derived = snap_dir.join("derived.sqry");
524 let deadline = std::time::Instant::now() + Duration::from_secs(5);
525 while std::time::Instant::now() < deadline {
526 if derived.exists() {
527 break;
528 }
529 tokio::time::sleep(Duration::from_millis(20)).await;
530 }
531 assert!(
532 derived.exists(),
533 "PF03A: hook must write derived.sqry to {} within 5s",
534 derived.display()
535 );
536
537 // Sanity-check the file is non-empty and starts with the derived
538 // magic bytes — proves it actually went through `save_derived`.
539 let bytes = std::fs::read(&derived).unwrap();
540 assert!(bytes.len() >= sqry_db::DERIVED_MAGIC.len());
541 assert_eq!(
542 &bytes[..sqry_db::DERIVED_MAGIC.len()],
543 sqry_db::DERIVED_MAGIC,
544 "derived.sqry must start with SQRY_DERIVED_V02 magic"
545 );
546 }
547}