Skip to main content

sqry_daemon/workspace/
hook.rs

1//! `SqrydHook` — post-publish persistence hook.
2//!
3//! Phase 6c of the sqryd plan, hardened by PF03A (corrective program
4//! 2026-05-07). Every successful publish triggers a best-effort write
5//! of the derived-analysis cache (`.sqry/graph/derived.sqry`) via
6//! `sqry_db::persistence::save_derived`, on a background tokio task
7//! with a configurable timeout. Errors and timeouts are logged at
8//! WARN and absorbed — they never fail the query/publish path.
9//!
10//! This module defines the [`SqrydHook`] trait, the [`NoOpHook`] default
11//! used for tests and embedded callers, and the production
12//! [`QueryDbHook`] that the `sqryd` binary installs at startup
13//! (PF03B).
14//!
15//! ## PF03A architectural decisions
16//!
17//! ### Crate / feature boundary
18//!
19//! `sqry-daemon` depends on `sqry-db` unconditionally (added in PF03A).
20//! Earlier comments suggested gating the dependency behind a `sqry-db-hook`
21//! Cargo feature so embedders could opt out of the writer; PF03A discards
22//! that gating because the corrective program (see
23//! `docs/development/generational-analysis-platform/priority-followups/03_IMPLEMENTATION_PLAN.md`
24//! §A2) demands the production hook be present in every production
25//! sqryd build, not opt-in. Embedders that want the no-persistence
26//! behaviour pass [`noop_hook`] or any custom impl explicitly into
27//! [`super::WorkspaceManager::set_hook`].
28//!
29//! ### Snapshot SHA timing (PF03A decision A, corrected by PF09)
30//!
31//! [`QueryDbHook::on_publish`] first writes the published graph to the
32//! canonical `<workspace_root>/.sqry/graph/snapshot.sqry` path, then
33//! hashes that file, reloads it, warms a bounded query inventory over the
34//! reloaded snapshot, and saves `derived.sqry` with the same SHA. That
35//! makes the snapshot file the single verified identity source for both
36//! `DerivedHeader.snapshot_sha256` and the persisted query entries.
37//! A stale or missing pre-existing `snapshot.sqry` is overwritten by the
38//! published graph before the derived cache is produced.
39//!
40//! ### Async lifetime / ownership (PF03A decision B)
41//!
42//! The hook is invoked with an owned `Arc<CodeGraph>` (cloned from the
43//! published workspace inside `WorkspaceManager`). The spawned task
44//! moves that `Arc` into its closure, takes a [`GraphSnapshot`] from it,
45//! and wraps the snapshot in its own `Arc` for the temporary
46//! [`sqry_db::QueryDb`]. The strong reference held by the spawned task
47//! keeps the underlying graph data alive until `save_derived` returns
48//! (or the timeout fires), independent of any concurrent workspace
49//! `unload`, eviction, or replacement publish. Once the task drops, the
50//! `Arc` is released; if the manager has already evicted the workspace
51//! the data is freed at that point. There is no path that can leave
52//! the spawned task reading freed memory.
53//!
54//! ### Failure isolation
55//!
56//! All work runs under [`spawn_hook`], which wraps the future in
57//! [`tokio::time::timeout`] and logs both error and timeout outcomes
58//! at WARN. The publish/query path never observes the result — the
59//! caller does not await the spawned task.
60//!
61//! The hook runs on the current tokio runtime via `tokio::spawn`; the
62//! publish call site does not await it. The default timeout is taken
63//! from `DaemonConfig::rebuild_drain_timeout_ms` (5 s by default),
64//! clampable by the call site if a tighter ceiling is desired.
65
66use std::{path::Path, sync::Arc, time::Duration};
67
68use sqry_core::graph::CodeGraph;
69use sqry_db::queries::{CalleesQuery, CallersQuery, RelationKey};
70use tracing::warn;
71
72const MAX_DERIVED_WARMUP_SYMBOLS: usize = 64;
73
74/// Signature for a post-publish persistence hook.
75///
76/// Called from [`super::WorkspaceManager::publish_and_retain`]
77/// *after* the admission commit has succeeded, with the published
78/// `Arc<CodeGraph>` and the workspace root path. The hook is
79/// expected to return immediately — any actual IO should be
80/// spawned on a tokio task via [`spawn_hook`] or equivalent —
81/// because `publish_and_retain` is a sync critical section.
82pub trait SqrydHook: Send + Sync + std::fmt::Debug {
83    /// Notify the hook that a fresh graph has been published for
84    /// `workspace_root`. Implementations should NOT block; they
85    /// should spawn a background task and return.
86    fn on_publish(&self, workspace_root: &Path, graph: Arc<CodeGraph>);
87}
88
89impl<T: SqrydHook + ?Sized> SqrydHook for Arc<T> {
90    fn on_publish(&self, workspace_root: &Path, graph: Arc<CodeGraph>) {
91        (**self).on_publish(workspace_root, graph);
92    }
93}
94
95/// Null implementation — used by unit tests + the Phase 6c
96/// default when no production hook is wired. Logs nothing, does
97/// nothing, adds no runtime overhead.
98#[derive(Debug, Default, Clone, Copy)]
99pub struct NoOpHook;
100
101impl SqrydHook for NoOpHook {
102    fn on_publish(&self, _workspace_root: &Path, _graph: Arc<CodeGraph>) {
103        // deliberately empty
104    }
105}
106
107/// Shared handle to the active hook. The manager stores an
108/// `ArcSwap<Arc<dyn SqrydHook>>` so Task 9 can install the
109/// production hook after the daemon boots (once the sqry-db
110/// `QueryDb` is built), and unit tests can install a recording
111/// hook at construction time.
112pub type SharedHook = Arc<dyn SqrydHook>;
113
114/// Convenience constructor for [`NoOpHook`] as a [`SharedHook`].
115#[must_use]
116pub fn noop_hook() -> SharedHook {
117    Arc::new(NoOpHook)
118}
119
120/// Spawn an async persistence task with the configured timeout.
121///
122/// The task's result is never awaited by the caller. Errors and
123/// timeouts are logged at WARN; the query path is unaffected.
124///
125/// This helper is public so the production
126/// [`super::manager::WorkspaceManager`] and custom `SqrydHook`
127/// impls can share the same timeout-and-absorb pattern.
128pub fn spawn_hook<F, Fut, E>(
129    timeout: Duration,
130    workspace_root: std::path::PathBuf,
131    task_label: &'static str,
132    fut_factory: F,
133) where
134    F: FnOnce() -> Fut + Send + 'static,
135    Fut: std::future::Future<Output = Result<(), E>> + Send + 'static,
136    E: std::fmt::Display + Send + 'static,
137{
138    tokio::spawn(async move {
139        let fut = fut_factory();
140        match tokio::time::timeout(timeout, fut).await {
141            Ok(Ok(())) => {}
142            Ok(Err(err)) => {
143                warn!(
144                    task = task_label,
145                    workspace = %workspace_root.display(),
146                    error = %err,
147                    "sqryd hook {task_label} failed (absorbed; query path continues)",
148                );
149            }
150            Err(_elapsed) => {
151                let timeout_ms = u64::try_from(timeout.as_millis()).unwrap_or(u64::MAX);
152                warn!(
153                    task = task_label,
154                    workspace = %workspace_root.display(),
155                    timeout_ms,
156                    "sqryd hook {task_label} timed out (absorbed; query path continues)",
157                );
158            }
159        }
160    });
161}
162
163// ---------------------------------------------------------------------------
164// QueryDbHook — production post-publish hook.
165//
166// PF03A introduces this type. PF03B (`sqryd` startup) calls
167// `WorkspaceManager::set_hook(QueryDbHook::new(timeout))` so every published
168// graph triggers a `sqry_db::persistence::save_derived` call against the
169// canonical `<workspace_root>/.sqry/graph/derived.sqry` path. Failures are
170// logged at WARN and absorbed; the publish path is never blocked.
171// ---------------------------------------------------------------------------
172
173/// Production [`SqrydHook`] backed by `sqry_db::persistence::save_derived`.
174///
175/// See the module-level docs for the snapshot-SHA-timing and lifetime
176/// decisions (PF03A A and B). The hook is parameterised by:
177///
178/// - `timeout` — wall-clock cap applied via [`spawn_hook`] /
179///   [`tokio::time::timeout`]. Defaults to `DaemonConfig::rebuild_drain_timeout_ms`
180///   (5 s) when constructed via [`Self::new`].
181/// - `query_db_config` — passed straight to [`sqry_db::QueryDb::new`] and
182///   used by [`sqry_db::derived_path`] to compute the target file. The
183///   default value points at `derived.sqry` and uses the standard
184///   per-entry size cap; production sqryd should generally accept the
185///   default.
186#[derive(Debug, Clone)]
187pub struct QueryDbHook {
188    timeout: Duration,
189    query_db_config: sqry_db::QueryDbConfig,
190}
191
192impl QueryDbHook {
193    /// Construct a hook with the supplied timeout and the default
194    /// [`sqry_db::QueryDbConfig`].
195    ///
196    /// PF03B's startup path uses this constructor with
197    /// `DaemonConfig::rebuild_drain_timeout_ms`.
198    #[must_use]
199    pub fn new(timeout: Duration) -> Arc<Self> {
200        Arc::new(Self {
201            timeout,
202            query_db_config: sqry_db::QueryDbConfig::default(),
203        })
204    }
205
206    /// Construct a hook with a caller-provided [`sqry_db::QueryDbConfig`].
207    ///
208    /// Reserved for callers that need to override the default
209    /// `derived_persistence_filename` or per-entry size cap. Production
210    /// sqryd should normally use [`Self::new`].
211    #[must_use]
212    pub fn with_query_db_config(
213        timeout: Duration,
214        query_db_config: sqry_db::QueryDbConfig,
215    ) -> Arc<Self> {
216        Arc::new(Self {
217            timeout,
218            query_db_config,
219        })
220    }
221
222    /// Returns the configured wall-clock timeout. Exposed for tests and
223    /// observability surfaces.
224    #[must_use]
225    pub fn timeout(&self) -> Duration {
226        self.timeout
227    }
228}
229
230impl SqrydHook for QueryDbHook {
231    fn on_publish(&self, workspace_root: &Path, graph: Arc<CodeGraph>) {
232        let timeout = self.timeout;
233        let query_db_config = self.query_db_config.clone();
234        let workspace_root_owned = workspace_root.to_path_buf();
235
236        // Move owned values into the spawned task. spawn_hook wraps the
237        // future in `tokio::time::timeout` and absorbs errors / timeouts.
238        spawn_hook::<_, _, anyhow::Error>(
239            timeout,
240            workspace_root_owned.clone(),
241            "query-db-save-derived",
242            move || {
243                let workspace_root = workspace_root_owned;
244                let graph = graph;
245                let query_db_config = query_db_config;
246                async move { run_save_derived(workspace_root, graph, query_db_config).await }
247            },
248        );
249    }
250}
251
252/// Body of the production hook: persist the published graph to the canonical
253/// snapshot path, reload that exact on-disk snapshot, warm a bounded inventory
254/// of persistent relation queries, and persist the derived cache via
255/// `save_derived`.
256///
257/// All filesystem-touching operations run under [`tokio::task::spawn_blocking`]
258/// so the runtime never blocks on synchronous IO.
259async fn run_save_derived(
260    workspace_root: std::path::PathBuf,
261    graph: Arc<CodeGraph>,
262    query_db_config: sqry_db::QueryDbConfig,
263) -> anyhow::Result<()> {
264    tokio::task::spawn_blocking(move || {
265        run_save_derived_blocking(&workspace_root, &graph, query_db_config)
266    })
267    .await
268    .map_err(|join_err| anyhow::anyhow!("spawn_blocking(query-db-save-derived) join: {join_err}"))?
269}
270
271fn run_save_derived_blocking(
272    workspace_root: &Path,
273    graph: &CodeGraph,
274    query_db_config: sqry_db::QueryDbConfig,
275) -> anyhow::Result<()> {
276    let graph_dir = workspace_root.join(".sqry").join("graph");
277    let snapshot_path = graph_dir.join("snapshot.sqry");
278    std::fs::create_dir_all(&graph_dir)?;
279
280    sqry_core::graph::unified::persistence::save_to_path(graph, &snapshot_path)?;
281    let sha = sqry_db::persistence::compute_file_sha256(&snapshot_path).map_err(|io_err| {
282        anyhow::anyhow!("compute_file_sha256({}): {io_err}", snapshot_path.display())
283    })?;
284
285    let persisted_graph =
286        sqry_core::graph::unified::persistence::load_from_path(&snapshot_path, None)?;
287    let snapshot_arc = Arc::new(persisted_graph.snapshot());
288    let db = sqry_db::QueryDb::new(snapshot_arc, query_db_config);
289    let warmed_entries = warm_persistent_queries(&db);
290    tracing::debug!(
291        workspace = %workspace_root.display(),
292        warmed_entries,
293        "QueryDbHook: warmed persistent query entries before derived-cache save"
294    );
295
296    let derived_path = sqry_db::derived_path(workspace_root, db.config());
297    sqry_db::persistence::save_derived(&db, sha, &derived_path, workspace_root)?;
298
299    Ok(())
300}
301
302fn warm_persistent_queries(db: &sqry_db::QueryDb) -> usize {
303    let mut symbol_names = std::collections::BTreeSet::new();
304    for (_node_id, node) in db.snapshot().iter_nodes() {
305        if node.is_unified_loser() {
306            continue;
307        }
308        if let Some(name) = db.snapshot().strings().resolve(node.name) {
309            symbol_names.insert(name.to_string());
310        }
311        if let Some(qualified_name_id) = node.qualified_name
312            && let Some(qualified_name) = db.snapshot().strings().resolve(qualified_name_id)
313        {
314            symbol_names.insert(qualified_name.to_string());
315        }
316        if symbol_names.len() >= MAX_DERIVED_WARMUP_SYMBOLS {
317            break;
318        }
319    }
320
321    let mut warmed_entries = 0usize;
322    for symbol_name in symbol_names {
323        let key = RelationKey::exact(symbol_name);
324        let _ = db.get::<CallersQuery>(&key);
325        let _ = db.get::<CalleesQuery>(&key);
326        warmed_entries += 2;
327    }
328    warmed_entries
329}
330
331/// Recording hook used by unit tests to observe hook invocations
332/// without exercising the real persistence path.
333#[doc(hidden)]
334#[derive(Debug, Default)]
335pub struct RecordingHook {
336    pub invocations: parking_lot::Mutex<Vec<std::path::PathBuf>>,
337}
338
339impl RecordingHook {
340    #[must_use]
341    pub fn new() -> Arc<Self> {
342        Arc::new(Self::default())
343    }
344
345    #[must_use]
346    pub fn invocation_count(&self) -> usize {
347        self.invocations.lock().len()
348    }
349
350    #[must_use]
351    pub fn invocation_roots(&self) -> Vec<std::path::PathBuf> {
352        self.invocations.lock().clone()
353    }
354}
355
356impl SqrydHook for RecordingHook {
357    fn on_publish(&self, workspace_root: &Path, _graph: Arc<CodeGraph>) {
358        self.invocations.lock().push(workspace_root.to_path_buf());
359    }
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365
366    #[test]
367    fn noop_hook_compiles_through_shared_dispatch() {
368        let hook: SharedHook = noop_hook();
369        let graph = Arc::new(CodeGraph::new());
370        hook.on_publish(Path::new("/repos/example"), graph);
371    }
372
373    #[test]
374    fn recording_hook_captures_invocations_in_order() {
375        let hook = RecordingHook::new();
376        let graph = Arc::new(CodeGraph::new());
377        hook.on_publish(Path::new("/repos/a"), Arc::clone(&graph));
378        hook.on_publish(Path::new("/repos/b"), Arc::clone(&graph));
379        assert_eq!(hook.invocation_count(), 2);
380        let roots = hook.invocation_roots();
381        assert_eq!(roots[0], Path::new("/repos/a"));
382        assert_eq!(roots[1], Path::new("/repos/b"));
383    }
384
385    #[tokio::test]
386    async fn spawn_hook_absorbs_error() {
387        // Hook returns Err; timeout wrapper logs at WARN and
388        // absorbs. Success criterion: the spawned task completes
389        // without panic.
390        spawn_hook::<_, _, &'static str>(
391            Duration::from_millis(100),
392            std::path::PathBuf::from("/repos/example"),
393            "test-hook",
394            || async { Err("simulated failure") },
395        );
396        // Give the spawned task time to run.
397        tokio::time::sleep(Duration::from_millis(50)).await;
398    }
399
400    #[tokio::test]
401    async fn spawn_hook_absorbs_timeout() {
402        spawn_hook::<_, _, &'static str>(
403            Duration::from_millis(10),
404            std::path::PathBuf::from("/repos/example"),
405            "test-hook",
406            || async {
407                tokio::time::sleep(Duration::from_secs(1)).await;
408                Ok(())
409            },
410        );
411        tokio::time::sleep(Duration::from_millis(50)).await;
412    }
413
414    // ---------------------------------------------------------------------
415    // PF03A regression coverage for the production QueryDbHook.
416    // ---------------------------------------------------------------------
417
418    /// Construction smoke test: the production hook builds with the default
419    /// config and reports the requested timeout.
420    #[test]
421    fn pf03a_query_db_hook_constructs_with_requested_timeout() {
422        let hook = QueryDbHook::new(Duration::from_millis(1234));
423        assert_eq!(hook.timeout(), Duration::from_millis(1234));
424    }
425
426    /// `with_query_db_config` accepts a custom [`sqry_db::QueryDbConfig`]
427    /// and threads it through unchanged.
428    #[test]
429    fn pf03a_query_db_hook_accepts_custom_config() {
430        let cfg = sqry_db::QueryDbConfig::default();
431        let hook = QueryDbHook::with_query_db_config(Duration::from_millis(50), cfg);
432        // The hook holds an owned clone; the timeout accessor proves the
433        // construction path completed.
434        assert_eq!(hook.timeout(), Duration::from_millis(50));
435    }
436
437    /// PF09 correction: when the canonical snapshot file is absent, the
438    /// hook writes the published graph there first and then binds
439    /// derived.sqry to that freshly-written snapshot identity.
440    #[tokio::test]
441    async fn pf09_query_db_hook_no_snapshot_file_writes_snapshot_and_derived() {
442        let workspace = tempfile::tempdir().expect("tempdir");
443        let hook = QueryDbHook::new(Duration::from_secs(2));
444        let graph = Arc::new(CodeGraph::new());
445
446        // Create the .sqry/graph/ directory but no snapshot.sqry file —
447        // mirrors a publish that hasn't yet flushed to disk.
448        std::fs::create_dir_all(workspace.path().join(".sqry").join("graph")).unwrap();
449
450        hook.on_publish(workspace.path(), graph);
451
452        // Give the spawned task a generous window to complete.
453        tokio::time::sleep(Duration::from_millis(500)).await;
454
455        let snapshot = workspace
456            .path()
457            .join(".sqry")
458            .join("graph")
459            .join("snapshot.sqry");
460        let derived = workspace
461            .path()
462            .join(".sqry")
463            .join("graph")
464            .join("derived.sqry");
465        assert!(
466            snapshot.exists(),
467            "PF09: hook must write the published snapshot when it is absent (got {})",
468            snapshot.display()
469        );
470        assert!(
471            derived.exists(),
472            "PF09: hook must create derived.sqry after writing snapshot.sqry (got {})",
473            derived.display()
474        );
475    }
476
477    /// PF03A failure isolation: when the snapshot file is malformed (cannot
478    /// be hashed because the parent directory was deleted mid-flight, or
479    /// the path has been replaced by a directory), the hook absorbs the
480    /// error and never panics. The publish path must keep running.
481    #[tokio::test]
482    async fn pf03a_query_db_hook_absorbs_save_failure() {
483        let workspace = tempfile::tempdir().expect("tempdir");
484        let hook = QueryDbHook::new(Duration::from_secs(1));
485        let graph = Arc::new(CodeGraph::new());
486
487        // Create a `snapshot.sqry` directory entry that's actually a
488        // directory; `compute_file_sha256` will fail with a non-NotFound
489        // error and `save_derived` is never called. Either way, the hook
490        // returns without panicking.
491        let snap_dir = workspace.path().join(".sqry").join("graph");
492        std::fs::create_dir_all(&snap_dir).unwrap();
493        std::fs::create_dir_all(snap_dir.join("snapshot.sqry")).unwrap();
494
495        hook.on_publish(workspace.path(), graph);
496        tokio::time::sleep(Duration::from_millis(100)).await;
497
498        // No panic = success. derived.sqry must not exist (the hook never
499        // got far enough to write it).
500        let derived = snap_dir.join("derived.sqry");
501        assert!(
502            !derived.exists(),
503            "PF03A: a hashing failure must not leave a partially-written derived.sqry"
504        );
505    }
506
507    /// PF03A/PF09 end-to-end happy path: with a published graph, the hook
508    /// writes snapshot.sqry and then derived.sqry to canonical paths before
509    /// its timeout fires.
510    #[tokio::test]
511    async fn pf03a_query_db_hook_writes_derived_sqry_when_snapshot_present() {
512        let workspace = tempfile::tempdir().expect("tempdir");
513        let snap_dir = workspace.path().join(".sqry").join("graph");
514        std::fs::create_dir_all(&snap_dir).unwrap();
515
516        let graph_owned = CodeGraph::new();
517        let hook = QueryDbHook::new(Duration::from_secs(5));
518        let graph = Arc::new(graph_owned);
519        hook.on_publish(workspace.path(), graph);
520
521        // Poll for the derived file with a generous deadline so slow CI
522        // runners don't false-negative.
523        let derived = snap_dir.join("derived.sqry");
524        let deadline = std::time::Instant::now() + Duration::from_secs(5);
525        while std::time::Instant::now() < deadline {
526            if derived.exists() {
527                break;
528            }
529            tokio::time::sleep(Duration::from_millis(20)).await;
530        }
531        assert!(
532            derived.exists(),
533            "PF03A: hook must write derived.sqry to {} within 5s",
534            derived.display()
535        );
536
537        // Sanity-check the file is non-empty and starts with the derived
538        // magic bytes — proves it actually went through `save_derived`.
539        let bytes = std::fs::read(&derived).unwrap();
540        assert!(bytes.len() >= sqry_db::DERIVED_MAGIC.len());
541        assert_eq!(
542            &bytes[..sqry_db::DERIVED_MAGIC.len()],
543            sqry_db::DERIVED_MAGIC,
544            "derived.sqry must start with SQRY_DERIVED_V02 magic"
545        );
546    }
547}