sqry-daemon 15.0.1

sqry daemon (sqryd) — persistent code-graph service
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
//! Shared tool-dispatch core used by both the JSON-RPC path
//! (`ipc::methods::tool_dispatch::classify_and_build`) and the
//! MCP host path (`mcp_host::DaemonMcpHandler::call_tool`, U8).
//!
//! Introduced in Phase 8c to close Codex iter-1 B4 (CPU-heavy tool
//! work on tokio workers) and iter-1 M2 (avoid two parallel 14-arm
//! dispatchers that could drift). The classify/execute/stale-warning
//! logic lives here; the two transports wrap its `ExecuteVerdict`
//! return into their respective envelope formats.
//!
//! # Concurrency model
//!
//! `classify_and_execute` runs the user-supplied `run` closure inside
//! [`tokio::task::spawn_blocking`] so CPU-heavy graph traversal never
//! ties up a tokio worker. It then wraps the resulting
//! [`tokio::task::JoinHandle`] in [`tokio::time::timeout`] with the
//! caller-supplied per-tool deadline. When the outer timeout fires the
//! `JoinHandle` is dropped — the OS thread continues executing the
//! closure until the closure itself returns, but its result is
//! discarded and [`DaemonError::ToolTimeout`] is returned on the wire.
//! This bounds RESPONSE LATENCY, not the lifetime of a runaway tool
//! closure (see `DaemonConfig::tool_timeout_secs` docs).

use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, SystemTime};

/// Test instrumentation for `execute_with_timeout`.
///
/// Provides a workspace-path-keyed notification mechanism so integration
/// tests can prove the real daemon `spawn_blocking(dispatch_by_name)` OS
/// thread actually started before they fire `server.shutdown.cancel()`.
///
/// # Design (iter-8 redesign)
///
/// Earlier iterations (iter-3 through iter-7) used a single global notifier
/// slot keyed by a monotonically increasing `u64` token. Codex iter-3 through
/// iter-6 reviews surfaced a sequence of races (cross-test wipe, snapshot
/// vs register reordering, etc.) that we tried to patch with token-aware
/// `clear`/`notify` and a `tokio::sync::Mutex` serializer (iter-7). None of
/// those patches address the root cause: **the daemon's `notify(token)` call
/// runs for EVERY tool dispatch — including tool calls from OTHER concurrent
/// tests in the same binary**. With a single-slot global notifier, any other
/// test calling `call_tool` while our test holds a registration would fire
/// our flag spuriously (the daemon's `notify` reads our token from the slot
/// and matches it, even though the dispatch belongs to a different test's
/// call).
///
/// The iter-8 fix binds ownership at `register()` time using the **canonical
/// workspace path** as the key:
///
/// 1. The test creates a unique tempdir → unique canonicalised path.
/// 2. The test calls `register(workspace_path, flag)` to add an entry to
///    a `Vec<(PathBuf, Arc<AtomicBool>)>` registry.
/// 3. The daemon's `execute_with_timeout` calls `notify(&canonical_root)`
///    inside the `spawn_blocking` closure with the dispatched workspace's
///    path. `notify` fires the flag of the entry whose path equals
///    `canonical_root` — no other test's registration is touched.
/// 4. The test calls `clear(workspace_path)` at teardown to remove its own
///    entry by path.
///
/// Because every test creates its own tempdir, paths are guaranteed unique.
/// Concurrent tests cannot collide because the registry is a `Vec` (multiple
/// simultaneous registrations are allowed) and the lookup key (path) is
/// per-test-private.
///
/// # Why this is simpler than the iter-7 design
///
/// - No tokens, no `SEQ` counter, no `snapshot_token` step.
/// - No `HOOK_SERIALIZER` — multiple tests can register concurrently.
/// - The registry is a plain `std::sync::Mutex<Vec<...>>` with negligible
///   contention (each tool dispatch takes the lock for one linear scan).
/// - Cannot suffer the "test B's call fires test A's flag" cross-test
///   contamination that iter-3 through iter-7 all retained, because the
///   `notify` key (path) is the dispatched workspace's root, not a global
///   slot value.
///
/// This module is intentionally always compiled (not gated by `#[cfg(test)]`)
/// because the library crate is compiled once — without `cfg(test)` — even
/// when running integration tests, so a `#[cfg(test)]` guard on library code
/// is not visible to integration test binaries. The runtime cost in
/// production is one `Mutex<Vec<_>>::lock()` per tool dispatch over an
/// always-empty vector — a single uncontended atomic CAS.
///
/// # Usage (integration tests)
///
/// ```ignore
/// use std::sync::{Arc, atomic::{AtomicBool, Ordering}};
/// use sqry_daemon::ipc::tool_core::thread_start_hook;
///
/// let canon = canonicalize_path(&dir.path()).unwrap();
/// let started = Arc::new(AtomicBool::new(false));
/// thread_start_hook::register(canon.clone(), Arc::clone(&started));
/// // ... submit call_tool against `canon`, poll started, fire shutdown ...
/// thread_start_hook::clear(&canon);
/// ```
#[doc(hidden)]
pub mod thread_start_hook {
    use std::path::{Path, PathBuf};
    use std::sync::Arc;
    use std::sync::Mutex;
    use std::sync::atomic::{AtomicBool, Ordering};

    /// Path-keyed registry of test notifiers. Each entry is a
    /// `(canonical workspace path, notifier flag)` pair. The daemon's
    /// `notify(path)` fires the flag of the entry whose path equals
    /// `path` — `None` of the others is touched. Multiple concurrent
    /// registrations are allowed; tests are isolated by their unique
    /// tempdir paths.
    static REGISTRY: Mutex<Vec<(PathBuf, Arc<AtomicBool>)>> = Mutex::new(Vec::new());

    /// Register a per-test notifier keyed by the canonical workspace
    /// path that the test will dispatch tool calls against.
    ///
    /// The path MUST be canonicalised (via
    /// `sqry_core::project::canonicalize_path`) so it matches the
    /// `canonical_root` value the daemon passes to [`notify`] inside
    /// `execute_with_timeout`. Tests typically use `tempfile::tempdir()`
    /// + `canonicalize_path(dir.path())` which guarantees uniqueness.
    ///
    /// If a previous registration for the same path is still present
    /// (e.g. a test forgot to call [`clear`]), it is replaced. This is
    /// a no-op for the common case because tempdir paths never repeat
    /// within a single test-binary process lifetime.
    pub fn register(workspace_path: PathBuf, flag: Arc<AtomicBool>) {
        let mut guard = REGISTRY
            .lock()
            .expect("thread_start_hook REGISTRY poisoned");
        // Replace any stale entry for this path; otherwise push a new one.
        if let Some(slot) = guard.iter_mut().find(|(p, _)| *p == workspace_path) {
            slot.1 = flag;
        } else {
            guard.push((workspace_path, flag));
        }
    }

    /// Remove the registration for `workspace_path`, if present.
    /// No-op when the path is not registered (e.g. the daemon's
    /// [`notify`] already fired it and we left it in place — note
    /// `notify` does NOT remove entries; teardown is the test's
    /// responsibility).
    pub fn clear(workspace_path: &Path) {
        let mut guard = REGISTRY
            .lock()
            .expect("thread_start_hook REGISTRY poisoned");
        guard.retain(|(p, _)| p != workspace_path);
    }

    /// Called from inside the `spawn_blocking` closure as its first
    /// action. Fires the registered flag for `workspace_path` if one
    /// exists; otherwise a no-op.
    ///
    /// Cross-test isolation is structural: `workspace_path` is the
    /// dispatched workspace's canonical root, so concurrent tests with
    /// different tempdir paths cannot fire each other's flags.
    pub(super) fn notify(workspace_path: &Path) {
        let guard = REGISTRY
            .lock()
            .expect("thread_start_hook REGISTRY poisoned");
        if let Some((_, flag)) = guard.iter().find(|(p, _)| p == workspace_path) {
            flag.store(true, Ordering::Release);
        }
    }
}

use serde_json::Value;
use sqry_core::graph::acquisition::{
    AcquisitionOperation, GraphAcquirer, GraphAcquisition, GraphAcquisitionRequest, GraphFreshness,
    MissingGraphPolicy, PathPolicy, PluginSelectionPolicy, StalePolicy,
};
use sqry_core::project::{ProjectRootMode, absolutize_without_resolution, canonicalize_path};
use sqry_core::query::executor::QueryExecutor;
use sqry_mcp::daemon_adapter::WorkspaceContext;

use crate::error::DaemonError;
use crate::workspace::{
    ServeVerdict, WorkspaceBuilder, WorkspaceKey, WorkspaceManager, acquirer::DaemonGraphProvider,
};

/// Outcome of [`classify_and_execute`]. Callers wrap this in their
/// transport-specific envelope (JSON-RPC `ResponseEnvelope` or MCP
/// `CallToolResult`) and, for stale verdicts, splice the
/// `stale_warning` string into the inner payload.
#[derive(Debug)]
pub(crate) enum ExecuteVerdict {
    /// Tool ran against a Fresh workspace (Loaded or Rebuilding state).
    Fresh {
        inner: Value,
        state: crate::workspace::WorkspaceState,
    },
    /// Tool ran against a Stale workspace. Callers MUST splice
    /// `stale_warning` into the response payload.
    Stale {
        inner: Value,
        stale_warning: String,
        last_good_at: SystemTime,
        last_error: Option<String>,
    },
}

/// Canonicalise a user-supplied `index_root` path, returning
/// [`DaemonError::InvalidArgument`] on any failure. This is the
/// transport-neutral twin of [`crate::ipc::path_policy::resolve_index_root`]
/// — the JSON-RPC path still goes through `path_policy` because its
/// return type is [`crate::ipc::methods::MethodError::InvalidParams`],
/// but the shared `tool_core` pipeline needs a typed
/// [`DaemonError::InvalidArgument`] so the MCP host (U8) can map the
/// same precondition failure into a `-32602`/`validation_error` MCP
/// envelope without going through `MethodError`.
/// SGA04 building block — public-to-the-crate alias for [`resolve_path`]
/// so the daemon graph provider (`workspace::acquirer`) can perform
/// path-policy validation through the SAME canonicaliser the existing
/// dispatch path uses, without duplicating the absolutise / metadata /
/// canonicalize ladder.
///
/// Path canonicalisation must run before any workspace classification
/// — see SGA02's `InvalidPath` precedence contract. Returning a
/// [`DaemonError::InvalidArgument`] keeps the error taxonomy local;
/// the acquirer maps it into [`sqry_core::graph::acquisition::GraphAcquisitionError::InvalidPath`].
///
/// SGA05 will route read-only tool dispatch through the provider; the
/// existing [`classify_and_execute`] entrypoint stays unchanged in this
/// DAG unit.
pub(crate) fn resolve_path_for_acquisition(raw: &Path) -> Result<PathBuf, DaemonError> {
    resolve_path(raw)
}

/// SGA04 building block — construct a daemon-side
/// [`DaemonGraphProvider`] for the supplied manager + builder pair.
///
/// SGA05 routes the JSON-RPC `tool_dispatch::classify_and_build`
/// closure path and the daemon MCP host's `call_tool` graph-backed
/// arms through [`acquire_and_execute`], which builds a provider
/// per-request via this helper.
pub(crate) fn daemon_graph_provider(
    manager: Arc<WorkspaceManager>,
    builder: Arc<dyn WorkspaceBuilder>,
) -> DaemonGraphProvider {
    DaemonGraphProvider::new(manager, builder)
}

fn resolve_path(raw: &Path) -> Result<PathBuf, DaemonError> {
    let absolutised =
        absolutize_without_resolution(raw).map_err(|e| DaemonError::InvalidArgument {
            reason: format!("path_policy: index_root absolutise: {e}"),
        })?;
    match std::fs::metadata(&absolutised) {
        Ok(meta) if meta.is_dir() => {
            canonicalize_path(&absolutised).map_err(|e| DaemonError::InvalidArgument {
                reason: format!("path_policy: index_root canonicalize: {e}"),
            })
        }
        Ok(_) => Err(DaemonError::InvalidArgument {
            reason: "path_policy: index_root exists but is not a directory".to_string(),
        }),
        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Err(DaemonError::InvalidArgument {
            reason: "path_policy: index_root does not exist; daemon/load requires \
                         an existing directory so a canonical WorkspaceKey can be computed"
                .to_string(),
        }),
        Err(e) => Err(DaemonError::InvalidArgument {
            reason: format!("path_policy: index_root stat: {e}"),
        }),
    }
}

/// SGA05 — shared acquire + execute + stale-warning pipeline backed
/// by the [`DaemonGraphProvider`].
///
/// Every daemon-hosted read-only tool (the 14 graph-backed tools in
/// [`sqry_mcp::tools_schema::DAEMON_SUPPORTED_TOOL_NAMES`] minus the
/// mutating `rebuild_index` and the translation-wrapper `sqry_ask`)
/// routes through this entrypoint, which:
///
/// 1. Builds a per-request [`DaemonGraphProvider`] over the supplied
///    `manager` + `builder` pair (`tool_name` is forwarded into the
///    acquisition metadata for diagnostics).
/// 2. Calls
///    [`GraphAcquirer::acquire`](sqry_core::graph::acquisition::GraphAcquirer::acquire)
///    with [`AcquisitionOperation::ReadOnlyQuery`]. The provider
///    canonicalises the path, classifies the workspace, and — on
///    eviction — performs the bounded one-shot read-only persisted
///    rehydrate (per SGA02 §Tool Ownership Boundary and SGA04 contract
///    guarantees).
/// 3. Maps the resulting [`GraphAcquisition`] / [`GraphFreshness`]
///    into the existing [`ExecuteVerdict`]:
///    - `Fresh` and `Reloaded` both surface as
///      [`ExecuteVerdict::Fresh`] so the wire envelope stays
///      byte-compatible (per SGA design §Staleness and Wire
///      Compatibility — reload metadata is internal-only).
///    - `Stale` surfaces as [`ExecuteVerdict::Stale`] with the
///      existing `_stale_warning` rendering preserved.
/// 4. Maps any [`GraphAcquisitionError`] through the
///    [`From<GraphAcquisitionError> for DaemonError`] impl so
///    `WorkspaceEvicted`, `WorkspaceIncompatibleGraph`, `Stale`
///    expiry, and `InvalidArgument` precedence all preserve their
///    existing JSON-RPC / MCP envelope shapes.
/// 5. Runs the user-supplied closure inside
///    [`tokio::task::spawn_blocking`] with the same
///    [`tokio::time::timeout`] outer bound used by
///    [`classify_and_execute`] — CPU-heavy graph traversal does not
///    tie up tokio workers.
///
/// `rebuild_index` MUST NOT call this helper; the mutating rebuild
/// path drives [`WorkspaceManager::get_or_load`] directly so the
/// durable rebuild contract owns those semantics. See
/// `sqry-daemon/src/mcp_host/mod.rs::handle_rebuild_index` and the
/// `MutatingRebuild` short-circuit inside the
/// [`DaemonGraphProvider::acquire`] implementation.
pub(crate) async fn acquire_and_execute<F>(
    manager: Arc<WorkspaceManager>,
    builder: Arc<dyn WorkspaceBuilder>,
    tool_executor: Arc<QueryExecutor>,
    tool_timeout: Duration,
    path: &str,
    tool_name: Option<&'static str>,
    run: F,
) -> Result<ExecuteVerdict, DaemonError>
where
    F: FnOnce(
            &WorkspaceContext,
            &sqry_core::query::cancellation::CancellationToken,
        ) -> anyhow::Result<Value>
        + Send
        + 'static,
{
    // Build a per-request provider (cheap — three Arc clones plus an
    // Option tag) and acquire the graph through the shared boundary.
    let mut provider = DaemonGraphProvider::new(manager, builder);
    if let Some(name) = tool_name {
        provider = provider.with_tool_name(name);
    }
    let request = GraphAcquisitionRequest {
        requested_path: PathBuf::from(path),
        operation: AcquisitionOperation::ReadOnlyQuery,
        // Daemon read-only paths use the in-tree default policies. The
        // provider already canonicalises through the daemon's
        // path-policy ladder (`tool_core::resolve_path`); the
        // `PathPolicy` field on the request is held for symmetry with
        // the filesystem provider.
        path_policy: PathPolicy::default(),
        // The daemon never auto-builds on miss for read-only queries —
        // the daemon's own admission and dispatch flow owns initial
        // graph load via `daemon/load`. `Error` here matches the
        // pre-SGA05 semantics: a workspace that has never been loaded
        // returns a `NotReady` / `WorkspaceBuildFailed` envelope.
        missing_graph_policy: MissingGraphPolicy::Error,
        stale_policy: StalePolicy::default(),
        plugin_selection_policy: PluginSelectionPolicy::default(),
        tool_name,
    };
    let acquisition: GraphAcquisition = provider.acquire(request).map_err(DaemonError::from)?;

    let canonical_root = acquisition.workspace_root.clone();
    let graph = Arc::clone(&acquisition.graph);
    let freshness = acquisition.freshness;

    let wctx = WorkspaceContext {
        workspace_root: canonical_root.clone(),
        graph,
        executor: tool_executor,
    };
    let inner = execute_with_timeout(tool_timeout, &canonical_root, wctx, run).await?;

    match freshness {
        // Fresh and Reloaded both produce the existing fresh response
        // envelope — reload is an internal recovery, not a wire-shape
        // change. The lifecycle label flows from the Fresh path; for
        // Reloaded acquisitions we map to the canonical Loaded state
        // because the bounded reload restores a Loaded workspace.
        GraphFreshness::Fresh { lifecycle_label } => {
            // Decode the daemon provider's lifecycle label back into
            // the wire-visible `WorkspaceState` so the
            // `ResponseMeta::fresh_from(state, ...)` envelope
            // accurately reports `Loaded` vs. `Rebuilding` (the only
            // two states the underlying `classify_for_serve` Fresh
            // arm can produce).
            let state = match lifecycle_label {
                Some("rebuilding") => crate::workspace::WorkspaceState::Rebuilding,
                _ => crate::workspace::WorkspaceState::Loaded,
            };
            Ok(ExecuteVerdict::Fresh { inner, state })
        }
        GraphFreshness::Reloaded { .. } => Ok(ExecuteVerdict::Fresh {
            inner,
            state: crate::workspace::WorkspaceState::Loaded,
        }),
        GraphFreshness::Stale {
            last_good_at,
            last_error,
            age_hours,
        } => {
            // Reconstruct the `last_good_at: SystemTime` and
            // `age_hours: u64` shape the wire envelope expects.
            // `GraphFreshness::Stale` carries the RFC3339 string for
            // transport neutrality; round-trip through chrono so the
            // existing `render_stale_warning` rendering produces the
            // same RFC3339 bytes.
            let parsed_last_good = last_good_at
                .as_deref()
                .and_then(|s| chrono::DateTime::parse_from_rfc3339(s).ok())
                .map(|dt| SystemTime::from(dt.with_timezone(&chrono::Utc)));
            let lg_at = parsed_last_good.unwrap_or_else(SystemTime::now);
            let age_u64 = age_hours.map(|h| h as u64).unwrap_or(0);
            let stale_warning =
                render_stale_warning(&canonical_root, age_u64, lg_at, last_error.as_deref());
            Ok(ExecuteVerdict::Stale {
                inner,
                stale_warning,
                last_good_at: lg_at,
                last_error,
            })
        }
    }
}

/// SGA05 legacy — kept exclusively for the in-crate unit tests in
/// the `tests` module below, which assert direct
/// `classify_for_serve` semantics (NotReady, Loading, ToolTimeout,
/// Internal-from-closure-error) that are easier to express against
/// `WorkspaceManager` directly than against
/// [`acquire_and_execute`] (which also runs path canonicalisation +
/// reload accounting). Production read-only tool dispatch now
/// routes through [`acquire_and_execute`].
///
/// Pipeline:
/// 1. Canonicalises `path` via [`resolve_path`] into a
///    [`DaemonError::InvalidArgument`] on failure.
/// 2. Classifies the workspace via
///    [`WorkspaceManager::classify_for_serve`].
/// 3. On Fresh/Stale: builds a [`WorkspaceContext`] and runs `run`
///    inside [`tokio::task::spawn_blocking`] with a
///    [`tokio::time::timeout(tool_timeout, ...)`] outer bound.
/// 4. On NotReady: returns [`DaemonError::WorkspaceBuildFailed`].
/// 5. On outer timeout: drops the [`tokio::task::JoinHandle`] and
///    returns [`DaemonError::ToolTimeout`] (OS thread continues;
///    result discarded).
#[allow(dead_code)] // Used by in-crate `mod tests`.
///
/// # Errors
///
/// - [`DaemonError::InvalidArgument`] — path canonicalisation failed.
/// - [`DaemonError::WorkspaceBuildFailed`] — NotReady verdict.
/// - [`DaemonError::WorkspaceStaleExpired`] — Stale expired past cap.
/// - [`DaemonError::WorkspaceEvicted`] — workspace evicted between
///   classify and graph capture.
/// - [`DaemonError::ToolTimeout`] — outer timeout fired.
/// - [`DaemonError::Internal`] — `run` returned `anyhow::Error` or
///   [`tokio::task::spawn_blocking`] join failed.
///
/// # Design
///
/// Per Codex iter-3 NIT-1, `daemon_version` is NOT a parameter here
/// — callers pass it to their respective envelope builders (Phase 8b
/// [`crate::ipc::protocol::ResponseMeta::fresh_from`] /
/// [`crate::ipc::protocol::ResponseMeta::stale_from`], MCP `rmcp`
/// envelope).
pub(crate) async fn classify_and_execute<F>(
    manager: Arc<WorkspaceManager>,
    tool_executor: Arc<QueryExecutor>,
    tool_timeout: Duration,
    path: &str,
    run: F,
) -> Result<ExecuteVerdict, DaemonError>
where
    F: FnOnce(
            &WorkspaceContext,
            &sqry_core::query::cancellation::CancellationToken,
        ) -> anyhow::Result<Value>
        + Send
        + 'static,
{
    // Step 1: canonicalise path.
    let canonical_root = resolve_path(Path::new(path))?;
    let key = WorkspaceKey::new(canonical_root.clone(), ProjectRootMode::GitRoot, 0);

    // Step 2: classify.
    let verdict = manager.classify_for_serve(&key, SystemTime::now())?;

    match verdict {
        ServeVerdict::Fresh { graph, state } => {
            let wctx = WorkspaceContext {
                workspace_root: canonical_root.clone(),
                graph,
                executor: tool_executor,
            };
            let inner = execute_with_timeout(tool_timeout, &canonical_root, wctx, run).await?;
            Ok(ExecuteVerdict::Fresh { inner, state })
        }
        ServeVerdict::Stale {
            graph,
            age_hours,
            last_good_at,
            last_error,
        } => {
            let wctx = WorkspaceContext {
                workspace_root: canonical_root.clone(),
                graph,
                executor: tool_executor,
            };
            let inner = execute_with_timeout(tool_timeout, &canonical_root, wctx, run).await?;
            let stale_warning = render_stale_warning(
                &canonical_root,
                age_hours,
                last_good_at,
                last_error.as_deref(),
            );
            Ok(ExecuteVerdict::Stale {
                inner,
                stale_warning,
                last_good_at,
                last_error,
            })
        }
        ServeVerdict::NotReady { state } => Err(DaemonError::WorkspaceBuildFailed {
            root: canonical_root,
            reason: format!("workspace not ready ({state:?}); call daemon/load first"),
        }),
    }
}

/// Run `run` inside `spawn_blocking` with an outer timeout.
///
/// Extracted helper so both the Fresh and Stale arms of
/// [`classify_and_execute`] share identical timeout semantics. On
/// timeout the detached [`tokio::task::JoinHandle`] is dropped (the OS
/// thread continues but the result is discarded); on join failure the
/// error is wrapped in [`DaemonError::Internal`].
async fn execute_with_timeout<F>(
    tool_timeout: Duration,
    canonical_root: &Path,
    wctx: WorkspaceContext,
    run: F,
) -> Result<Value, DaemonError>
where
    // `A_cancellation.md` §2 + `00_contracts.md` §3.CC-1: the daemon
    // closure receives both the `WorkspaceContext` and a borrowed
    // per-request `CancellationToken`. The wrapper retains ownership
    // of the canonical clone and signals on deadline so the in-flight
    // `spawn_blocking` thread observes the cancellation cooperatively
    // (per GT-6, a running blocking task cannot be aborted).
    F: FnOnce(
            &WorkspaceContext,
            &sqry_core::query::cancellation::CancellationToken,
        ) -> anyhow::Result<Value>
        + Send
        + 'static,
{
    // Derive the canonical wire fields BEFORE spawning so the borrow
    // of `canonical_root` does not cross the `.await`.
    let deadline_ms = u64::try_from(tool_timeout.as_millis()).unwrap_or(u64::MAX);
    let secs = tool_timeout.as_secs();
    let root_owned = canonical_root.to_path_buf();

    // Capture the canonical workspace path so the spawn_blocking closure
    // can call `thread_start_hook::notify(&path)` as its first action.
    // This is the path-keyed test instrumentation hook (iter-8 redesign):
    // a registered test notifier is fired only when its workspace path
    // matches `canonical_root`. Tests use unique tempdirs, so concurrent
    // tests cannot fire each other's flags. The hook is a no-op for the
    // common case of no test registration (one uncontended Mutex<Vec>
    // lock + linear scan over an always-empty vector).
    let hook_path = canonical_root.to_path_buf();

    // Per-request cancellation token. Wrapper owns the canonical clone;
    // closure owns a Send/Clone copy moved into spawn_blocking. Both
    // observe the same `Arc<AtomicBool>` flag.
    let cancel = sqry_core::query::cancellation::CancellationToken::new();
    let cancel_for_closure = cancel.clone();

    let join_handle = tokio::task::spawn_blocking(move || {
        // Signal that the real OS thread has started by firing the
        // registered notifier (if any) for the dispatched workspace path.
        // This is the FIRST action inside the closure so the test's
        // server-side barrier resolves before any graph work begins,
        // proving the real daemon dispatch path reached `spawn_blocking`
        // and the OS scheduler dispatched it.
        thread_start_hook::notify(&hook_path);
        run(&wctx, &cancel_for_closure)
    });
    let result = tokio::time::timeout(tool_timeout, join_handle).await;

    // Deadline elapsed → flip the token *before* falling through so
    // the detached blocking thread observes cancellation on its next
    // `evaluate_all` per-batch poll. We must NOT await the JoinHandle
    // here — the contract on the deadline arm is fire-and-forget; the
    // cooperative-cancellation token is what frees the blocking-pool
    // slot once the closure body returns. (Mirrors the standalone
    // `sqry-mcp::SqryServer::execute_tool_with_timeout` deadline path.)
    if result.is_err() {
        cancel.cancel();
    }

    match result {
        Ok(Ok(Ok(value))) => Ok(value),
        Ok(Ok(Err(err))) => {
            // `A_cancellation.md` §4: when the closure returned because
            // it observed the cancellation we just signalled, surface
            // the canonical `ToolTimeout` envelope so the wire shape is
            // identical to the wrapper-only timeout arm. `kind =
            // "deadline_exceeded"` is preserved across both paths so
            // MCP clients use a single discriminator regardless of
            // which side observed first.
            if let Some(sqry_core::query::QueryError::Cancelled) =
                err.downcast_ref::<sqry_core::query::QueryError>()
            {
                Err(DaemonError::ToolTimeout {
                    root: root_owned,
                    secs,
                    deadline_ms,
                })
            } else if let Some(gate_err) =
                err.downcast_ref::<sqry_core::query::cost_gate::CostGateError>()
            {
                // `B_cost_gate.md` §3 + `00_contracts.md` §3.CC-2:
                // pre-flight cost-gate rejection on the daemon-hosted
                // MCP path. `DaemonError::QueryTooBroad` carries the
                // CC-2 7-key `details` payload through to
                // `daemon_err_to_mcp` which emits the canonical 4-key
                // envelope (byte-identical to the standalone
                // `RpcError::query_too_broad` shape).
                Err(DaemonError::QueryTooBroad {
                    reason: gate_err.to_string(),
                    details: gate_err.to_query_too_broad_details(),
                })
            } else if let Some(gate_err) =
                err.downcast_ref::<sqry_db::planner::cost_gate::PlannerCostGateError>()
            {
                // Planner-side cost gate (`sqry_query`, `plan-query`).
                // Distinct error type, identical wire envelope.
                Err(DaemonError::QueryTooBroad {
                    reason: gate_err.to_string(),
                    details: gate_err.to_query_too_broad_details(),
                })
            } else if let Some(rpc_err) = err.downcast_ref::<sqry_mcp::error::RpcError>() {
                // Cluster-C iter-3: a typed `RpcError` propagated up
                // from the daemon adapter's argument-parsing layer
                // (`sqry-mcp/src/daemon_adapter/dispatch.rs`). Without
                // this arm, validation errors like `budget_rows: 0`
                // fall through to `DaemonError::Internal` and surface
                // as `McpError::internal_error` (-32603) on the wire,
                // diverging from the standalone path which emits
                // `McpError::invalid_params` (-32602). The
                // `RpcErrorPreserved` variant carries the typed
                // RpcError through to `daemon_err_to_mcp` so the
                // wire envelope is byte-identical to standalone.
                Err(DaemonError::RpcErrorPreserved(rpc_err.clone()))
            } else if let Some(budget_err) =
                err.downcast_ref::<sqry_core::query::budget::BudgetExceeded>()
            {
                // `C_budget.md` §3 + `00_contracts.md` §3.CC-2:
                // runtime row-budget exceedance surfaces with
                // `details.source = "runtime_budget"`. Cluster-C
                // iter-2: include the sanitised `predicate_shape` so
                // the daemon-hosted envelope is wire-comparable to
                // the standalone path and to the cluster-B static
                // estimate envelope.
                let details = serde_json::json!({
                    "source": "runtime_budget",
                    "kind": sqry_core::query::cost_gate::KIND_QUERY_TOO_BROAD,
                    "examined": budget_err.examined,
                    "limit": budget_err.limit,
                    "predicate_shape": budget_err.predicate_shape.clone(),
                    "suggested_predicates":
                        sqry_core::query::cost_gate::SCOPE_FILTER_FIELDS,
                    "doc_url":
                        sqry_core::query::cost_gate::QUERY_TOO_BROAD_DOC_URL,
                });
                Err(DaemonError::QueryTooBroad {
                    reason: budget_err.to_string(),
                    details,
                })
            } else {
                Err(DaemonError::Internal(err))
            }
        }
        Ok(Err(join_err)) => Err(DaemonError::Internal(anyhow::anyhow!(
            "spawn_blocking join: {join_err}"
        ))),
        Err(_elapsed) => Err(DaemonError::ToolTimeout {
            root: root_owned,
            secs,
            deadline_ms,
        }),
    }
}

/// Render the `_stale_warning` string spliced into a Stale verdict
/// response by the calling transport layer. Moved to this module so
/// both the JSON-RPC path and the MCP host share the same format.
///
/// With `last_error`:
/// ```text
/// workspace {root} served from last-good build at {rfc3339} ({age_hours}h stale); last error: {reason}
/// ```
/// Without `last_error`:
/// ```text
/// workspace {root} served from last-good build at {rfc3339} ({age_hours}h stale)
/// ```
///
/// The `; last error:` clause is omitted entirely when no diagnostic
/// is available (Phase 8b iter-1 n2 fix: don't emit a trailing
/// `: None`-style marker, keep the wire message self-describing).
pub(crate) fn render_stale_warning(
    root: &Path,
    age_hours: u64,
    last_good_at: SystemTime,
    last_error: Option<&str>,
) -> String {
    use chrono::{DateTime, SecondsFormat, Utc};
    let rfc3339 = DateTime::<Utc>::from(last_good_at).to_rfc3339_opts(SecondsFormat::Secs, true);
    match last_error {
        Some(reason) => format!(
            "workspace {} served from last-good build at {rfc3339} ({age_hours}h stale); last error: {reason}",
            root.display()
        ),
        None => format!(
            "workspace {} served from last-good build at {rfc3339} ({age_hours}h stale)",
            root.display()
        ),
    }
}

#[cfg(test)]
mod tests {
    use std::sync::Arc;
    use std::time::{Duration, SystemTime, UNIX_EPOCH};

    use serde_json::Value;

    use super::{classify_and_execute, render_stale_warning};
    use crate::config::DaemonConfig;
    use crate::error::DaemonError;
    use crate::workspace::WorkspaceManager;
    use sqry_core::query::executor::QueryExecutor;

    // ----- render_stale_warning (pure) ---------------------------------

    #[test]
    fn render_stale_warning_with_last_error() {
        let root = std::path::Path::new("/tmp/ws");
        // 2025-10-09T09:33:20Z — arbitrary past instant.
        let last_good = UNIX_EPOCH + Duration::from_secs(1_760_000_000);
        let got = render_stale_warning(root, 48, last_good, Some("parse error"));
        assert!(got.contains("/tmp/ws"));
        assert!(got.contains("48h stale"));
        assert!(got.contains("; last error: parse error"));
        // RFC3339 UTC-Zulu sentinel — `to_rfc3339_opts(Secs, true)` always
        // emits a trailing `Z` rather than `+00:00`.
        assert!(got.contains('Z'), "expected RFC3339 UTC-Zulu form: {got}");
    }

    #[test]
    fn render_stale_warning_without_last_error_omits_clause() {
        let root = std::path::Path::new("/tmp/ws");
        let last_good = UNIX_EPOCH + Duration::from_secs(1_760_000_000);
        let got = render_stale_warning(root, 48, last_good, None);
        assert!(got.contains("48h stale"));
        assert!(
            !got.contains("last error"),
            "None last_error must omit the clause entirely, got: {got}"
        );
    }

    // ----- classify_and_execute error-path tests -----------------------
    //
    // These tests do not require a live WorkspaceManager / QueryExecutor
    // with real graph state — the assertions exercise:
    //  * `InvalidArgument` when `path` canonicalisation fails (never
    //    reaches the manager)
    //  * `ToolTimeout` when the `run` closure sleeps past the deadline
    //    (requires a Loaded workspace — we use
    //    `insert_workspace_in_state_for_test` so we do not have to drag
    //    in a `WorkspaceBuilder` from the test crate)
    //  * `Internal` when `run` returns `anyhow::Err`
    //  * `WorkspaceBuildFailed` for a NotReady verdict (workspace in
    //    `Loading` state, never actually loaded)

    fn test_manager() -> Arc<WorkspaceManager> {
        let config = Arc::new(DaemonConfig::default());
        WorkspaceManager::new_without_reaper(config)
    }

    fn test_executor() -> Arc<QueryExecutor> {
        // `PluginManager` not required for these error-path assertions;
        // the closure never reaches the planner.
        Arc::new(QueryExecutor::new())
    }

    #[tokio::test]
    async fn classify_and_execute_invalid_path_returns_invalid_argument() {
        let manager = test_manager();
        let executor = test_executor();

        let run = |_wctx: &sqry_mcp::daemon_adapter::WorkspaceContext,
                   _cancel: &sqry_core::query::cancellation::CancellationToken|
         -> anyhow::Result<Value> { Ok(Value::Null) };
        let err = classify_and_execute(
            manager,
            executor,
            Duration::from_secs(10),
            "/this/path/does/not/exist/for/real",
            run,
        )
        .await
        .expect_err("non-existent path must fail");

        match err {
            DaemonError::InvalidArgument { reason } => {
                assert!(
                    reason.contains("path_policy"),
                    "expected 'path_policy' prefix, got: {reason}"
                );
            }
            other => panic!("expected InvalidArgument, got: {other:?}"),
        }
    }

    #[tokio::test]
    async fn classify_and_execute_notready_returns_workspace_build_failed() {
        // Insert a workspace in `Loading` state → classify_for_serve
        // returns NotReady → classify_and_execute maps to
        // `DaemonError::WorkspaceBuildFailed`.
        use sqry_core::project::{ProjectRootMode, canonicalize_path};

        let tmp = tempfile::tempdir().unwrap();
        let root = canonicalize_path(tmp.path()).unwrap();
        let manager = test_manager();
        let executor = test_executor();

        let key = crate::workspace::WorkspaceKey::new(root.clone(), ProjectRootMode::GitRoot, 0);
        manager.insert_workspace_in_state_for_test(key, crate::workspace::WorkspaceState::Loading);

        let run = |_wctx: &sqry_mcp::daemon_adapter::WorkspaceContext,
                   _cancel: &sqry_core::query::cancellation::CancellationToken|
         -> anyhow::Result<Value> { Ok(Value::Null) };
        let err = classify_and_execute(
            manager,
            executor,
            Duration::from_secs(10),
            root.to_str().unwrap(),
            run,
        )
        .await
        .expect_err("NotReady verdict must fail");

        match err {
            DaemonError::WorkspaceBuildFailed {
                root: got_root,
                reason,
            } => {
                assert_eq!(got_root, root);
                assert!(
                    reason.contains("workspace not ready"),
                    "expected 'workspace not ready' prefix, got: {reason}"
                );
                assert!(
                    reason.contains("Loading"),
                    "expected state Debug in message, got: {reason}"
                );
            }
            other => panic!("expected WorkspaceBuildFailed, got: {other:?}"),
        }
    }

    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn classify_and_execute_timeout_returns_tool_timeout() {
        // Insert a workspace in `Loaded` state → Fresh verdict →
        // `run` sleeps 500ms with tool_timeout=50ms → ToolTimeout
        // fires. Multi-thread flavor needed because spawn_blocking
        // parks a worker thread.
        use sqry_core::project::{ProjectRootMode, canonicalize_path};

        let tmp = tempfile::tempdir().unwrap();
        let root = canonicalize_path(tmp.path()).unwrap();
        let manager = test_manager();
        let executor = test_executor();

        let key = crate::workspace::WorkspaceKey::new(root.clone(), ProjectRootMode::GitRoot, 0);
        manager.insert_workspace_in_state_for_test(key, crate::workspace::WorkspaceState::Loaded);

        let run = |_wctx: &sqry_mcp::daemon_adapter::WorkspaceContext,
                   _cancel: &sqry_core::query::cancellation::CancellationToken|
         -> anyhow::Result<Value> {
            std::thread::sleep(Duration::from_millis(500));
            Ok(Value::Null)
        };
        let err = classify_and_execute(
            manager,
            executor,
            Duration::from_millis(50),
            root.to_str().unwrap(),
            run,
        )
        .await
        .expect_err("timeout must fire");

        match err {
            DaemonError::ToolTimeout {
                root: got_root,
                secs,
                deadline_ms,
            } => {
                assert_eq!(got_root, root);
                // 50ms rounds down to 0s for the secs field; the
                // deadline_ms field captures the real wire value.
                assert_eq!(secs, 0, "50ms rounds down to 0 whole seconds");
                assert_eq!(deadline_ms, 50);
            }
            other => panic!("expected ToolTimeout, got: {other:?}"),
        }
    }

    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn classify_and_execute_internal_error_on_run_failure() {
        use sqry_core::project::{ProjectRootMode, canonicalize_path};

        let tmp = tempfile::tempdir().unwrap();
        let root = canonicalize_path(tmp.path()).unwrap();
        let manager = test_manager();
        let executor = test_executor();

        let key = crate::workspace::WorkspaceKey::new(root.clone(), ProjectRootMode::GitRoot, 0);
        manager.insert_workspace_in_state_for_test(key, crate::workspace::WorkspaceState::Loaded);

        let run = |_wctx: &sqry_mcp::daemon_adapter::WorkspaceContext,
                   _cancel: &sqry_core::query::cancellation::CancellationToken|
         -> anyhow::Result<Value> {
            Err(anyhow::anyhow!("synthetic closure failure"))
        };
        let err = classify_and_execute(
            manager,
            executor,
            Duration::from_secs(10),
            root.to_str().unwrap(),
            run,
        )
        .await
        .expect_err("closure failure must surface");

        match err {
            DaemonError::Internal(inner) => {
                assert!(
                    inner.to_string().contains("synthetic closure failure"),
                    "expected closure error to survive, got: {inner}"
                );
            }
            other => panic!("expected Internal, got: {other:?}"),
        }
    }

    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn classify_and_execute_fresh_path_returns_inner_value() {
        // Positive-path smoke: Loaded workspace + happy closure yields
        // `ExecuteVerdict::Fresh { inner, state: Loaded }`.
        use sqry_core::project::{ProjectRootMode, canonicalize_path};

        let tmp = tempfile::tempdir().unwrap();
        let root = canonicalize_path(tmp.path()).unwrap();
        let manager = test_manager();
        let executor = test_executor();

        let key = crate::workspace::WorkspaceKey::new(root.clone(), ProjectRootMode::GitRoot, 0);
        manager.insert_workspace_in_state_for_test(key, crate::workspace::WorkspaceState::Loaded);

        let run = |_wctx: &sqry_mcp::daemon_adapter::WorkspaceContext,
                   _cancel: &sqry_core::query::cancellation::CancellationToken|
         -> anyhow::Result<Value> { Ok(serde_json::json!({"hello": "world"})) };
        let verdict = classify_and_execute(
            manager,
            executor,
            Duration::from_secs(10),
            root.to_str().unwrap(),
            run,
        )
        .await
        .expect("fresh path must succeed");

        match verdict {
            super::ExecuteVerdict::Fresh { inner, state } => {
                assert_eq!(inner, serde_json::json!({"hello": "world"}));
                assert_eq!(state, crate::workspace::WorkspaceState::Loaded);
            }
            other => panic!("expected Fresh, got: {other:?}"),
        }
    }

    // Sanity: the documented SystemTime round-trip used by
    // `render_stale_warning` is not platform-dependent.
    #[test]
    fn render_stale_warning_epoch_is_well_formed() {
        let got =
            render_stale_warning(std::path::Path::new("/ws"), 0, SystemTime::UNIX_EPOCH, None);
        assert!(got.contains("1970-01-01T00:00:00Z"), "unexpected: {got}");
    }
}