sqry_core/workspace/discovery.rs
1//! Workspace repository discovery utilities.
2//!
3//! Discovery scans a workspace root for repositories that have been indexed
4//! by `sqry index`. The canonical marker is `.sqry/graph/manifest.json` —
5//! the same file that `build_unified_graph_inner` writes (see
6//! `graph/unified/persistence/mod.rs`'s `GRAPH_DIR_NAME` and
7//! `MANIFEST_FILE_NAME` constants). The earlier `.sqry-index` placeholder
8//! was never written by the live build pipeline and is removed outright;
9//! there is no legacy fallback (RR-10 Gap #2 retains the per-workspace
10//! repository cap to bound walker work regardless of marker name).
11//!
12//! The walker honours `.gitignore` rules and additionally skips a small
13//! set of dependency / build directories whose contents must never be
14//! treated as discoverable repositories even when those directories are
15//! present without a `.gitignore` (e.g. `node_modules`, `target`). The
16//! ignore list is the repo-wide
17//! [`crate::project::path_utils::DEFAULT_IGNORED_DIRS`] (consulted via
18//! [`crate::project::path_utils::is_ignored_dir`]) so workspace discovery
19//! and single-repo project detection share one source of truth.
20
21use std::fs;
22use std::path::{Path, PathBuf};
23
24use ignore::WalkBuilder;
25use thiserror::Error;
26
27use super::error::{WorkspaceError, WorkspaceResult};
28use super::registry::{WorkspaceRepoId, WorkspaceRepository};
29// RR-10 Gap #2: Import repository count limit for DoS prevention
30use crate::config::buffers::max_repositories;
31// Repo-wide source of truth for directories to skip during repo discovery.
32use crate::project::path_utils::is_ignored_dir;
33
34/// Canonical marker filename written by `sqry index` under
35/// `<repo>/.sqry/graph/`. Discovery treats any file whose name matches this
36/// constant and whose parent directory is `.sqry/graph` as evidence of a
37/// repository root one level above.
38const MANIFEST_FILE_NAME: &str = "manifest.json";
39
40/// Directory segment containing [`MANIFEST_FILE_NAME`]. Used to validate
41/// that a candidate `manifest.json` actually lives inside a sqry graph
42/// directory (and not, say, an unrelated NPM `manifest.json`).
43const GRAPH_DIR_SEGMENT: &str = "graph";
44
45/// Parent of [`GRAPH_DIR_SEGMENT`]. The full canonical relative path is
46/// `.sqry/graph/manifest.json`.
47const SQRY_DIR_SEGMENT: &str = ".sqry";
48
49/// Discovery strategy for locating repositories within a workspace root.
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum DiscoveryMode {
52 /// Locate repositories by finding `.sqry/graph/manifest.json` markers
53 /// anywhere under root.
54 IndexFiles,
55 /// Only include repositories that are git roots (contain `.git/`) with
56 /// an index marker.
57 GitRoots,
58}
59
60/// Discover repositories beneath `root` according to `mode`.
61///
62/// # Errors
63///
64/// Returns [`WorkspaceError`] when filesystem traversal fails.
65pub fn discover_repositories(
66 root: &Path,
67 mode: DiscoveryMode,
68) -> WorkspaceResult<Vec<WorkspaceRepository>> {
69 let mut repositories = Vec::new();
70
71 let walker = WalkBuilder::new(root)
72 .hidden(false)
73 .ignore(false)
74 .git_ignore(true)
75 .git_exclude(true)
76 .parents(true)
77 .filter_entry(|entry| {
78 // Skip well-known dependency / build directories so discovery
79 // never wastes work descending into them. The ignore list is
80 // owned by `crate::project::path_utils::DEFAULT_IGNORED_DIRS`
81 // and consulted through `is_ignored_dir` to keep workspace
82 // discovery and single-repo project detection in lockstep.
83 !is_ignored_dir(entry.file_name())
84 })
85 .build();
86
87 for result in walker {
88 let entry = match result {
89 Ok(ok) => ok,
90 Err(err) => {
91 let message = err.to_string();
92 let io_err = err
93 .into_io_error()
94 .unwrap_or_else(|| std::io::Error::other(message));
95 return Err(WorkspaceError::Discovery {
96 root: root.to_path_buf(),
97 source: io_err,
98 });
99 }
100 };
101
102 if entry.file_type().is_some_and(|ft| ft.is_dir()) {
103 continue;
104 }
105
106 if entry.file_name() != MANIFEST_FILE_NAME {
107 continue;
108 }
109
110 let manifest_path = entry.into_path();
111
112 // Validate the candidate sits under `<repo>/.sqry/graph/manifest.json`.
113 // Without this guard, any `manifest.json` (e.g. NPM's package
114 // manifest) would be misclassified as a sqry index marker.
115 let Some(graph_dir) = manifest_path.parent() else {
116 continue;
117 };
118 if graph_dir.file_name().and_then(|s| s.to_str()) != Some(GRAPH_DIR_SEGMENT) {
119 continue;
120 }
121 let Some(sqry_dir) = graph_dir.parent() else {
122 continue;
123 };
124 if sqry_dir.file_name().and_then(|s| s.to_str()) != Some(SQRY_DIR_SEGMENT) {
125 continue;
126 }
127 let Some(repo_root) = sqry_dir.parent().map(Path::to_path_buf) else {
128 continue;
129 };
130
131 if matches!(mode, DiscoveryMode::GitRoots) && !repo_root.join(".git").is_dir() {
132 continue;
133 }
134
135 let relative_path = repo_root.strip_prefix(root).unwrap_or(repo_root.as_path());
136 let repo_id = WorkspaceRepoId::new(relative_path);
137 let name = repo_root.file_name().map_or_else(
138 || repo_id.as_str().to_string(),
139 |os| os.to_string_lossy().into_owned(),
140 );
141
142 let metadata = fs::metadata(&manifest_path);
143 let last_indexed_at = metadata.ok().and_then(|meta| meta.modified().ok());
144
145 // RR-10 Gap #2: Enforce repository count limit to prevent DoS via
146 // workspaces containing thousands of indexed repositories.
147 let max_repos = max_repositories();
148 if repositories.len() >= max_repos {
149 return Err(WorkspaceError::TooManyRepositories {
150 found: repositories.len(),
151 limit: max_repos,
152 });
153 }
154
155 repositories.push(WorkspaceRepository::new(
156 repo_id,
157 name,
158 repo_root,
159 manifest_path,
160 last_indexed_at,
161 ));
162 }
163
164 repositories.sort_by(|a, b| a.id.cmp(&b.id));
165 repositories.dedup_by(|a, b| a.id == b.id);
166 Ok(repositories)
167}
168
169// ───────────────────────────────────────────────────────────────────────
170// Ancestor-walk discovery (sqry-mcp flakiness P1, cluster E foundation)
171// ───────────────────────────────────────────────────────────────────────
172//
173// The ancestor-walk discovery below is a **separate concern** from
174// `discover_repositories` above. Where `discover_repositories` walks
175// **down** a workspace root looking for indexed repositories, the
176// ancestor walker walks **up** from a starting path to locate the
177// project boundary that should anchor `.sqry/graph` lookups.
178//
179// Source: `E_p1_cluster.md` §E.1 + `E_p1_cluster.md` §Hand-offs +
180// `00_contracts.md` §3.CC-4. The ancestor walk fixes #237 (workspace
181// discovery and plugin-selection recovery) and gates #239 (workspace
182// artifact hygiene + nested-index creation guard).
183
184/// Maximum ancestor walk depth for [`discover_workspace_root`]. Mirrors
185/// the `MAX_ANCESTOR_DEPTH = 64` already used by
186/// `sqry-core/src/graph/acquisition.rs::find_workspace_root` and
187/// `sqry-cli/src/index_discovery.rs::find_nearest_index`. Bound is
188/// O(64) lstat calls per discovery — cheap enough that no caching is
189/// required, and small enough that no realistic project layout
190/// approaches the limit.
191pub const MAX_ANCESTOR_DEPTH: usize = 64;
192
193/// Project-boundary marker filenames consulted by
194/// [`discover_workspace_root`]. The walker stops at the **first**
195/// ancestor containing any of these markers, even if no
196/// `.sqry/graph` exists in or above that ancestor. Order does not
197/// matter — presence of any marker terminates the walk.
198///
199/// The five chosen markers cover ~95% of sqry's known user base by
200/// language. Expansion to `setup.py`, `Gemfile`, `mix.exs`,
201/// `build.gradle`, `pom.xml`, or `composer.json` is deliberately
202/// deferred until field reports show false-`None` discoveries in
203/// Ruby / Elixir / JVM monorepos (see `E_p1_cluster.md` Open Q4).
204pub const PROJECT_MARKERS: &[&str] = &[
205 ".git",
206 "Cargo.toml",
207 "package.json",
208 "pyproject.toml",
209 "go.mod",
210];
211
212/// Outcome of [`discover_workspace_root`]. Distinguishes "found a
213/// graph inside the project boundary" from "hit the project boundary
214/// with no graph above it" so callers that *create* indexes (e.g.
215/// `sqry index`) can refuse to descend into ancestors of an outer
216/// project.
217#[derive(Debug, Clone, PartialEq, Eq)]
218pub enum WorkspaceRootDiscovery {
219 /// `.sqry/graph` (or legacy `.sqry-index`) found at `root` AT OR
220 /// INSIDE the project boundary at `boundary`. `depth` is the
221 /// ancestor distance from the discovery starting point to `root`.
222 GraphFound {
223 /// The directory immediately above `.sqry/graph`. Use this as
224 /// the workspace root for graph-load operations.
225 root: PathBuf,
226 /// The first ancestor containing a [`PROJECT_MARKERS`] entry.
227 /// Equal to `root` when the project marker lives at the same
228 /// directory as the `.sqry/graph` artifact.
229 boundary: PathBuf,
230 /// Depth from the original starting point.
231 depth: usize,
232 /// `true` when the walk started at a regular file (we walk up
233 /// from its parent); `false` when it started at a directory.
234 is_file_scope: bool,
235 },
236 /// Project boundary reached without seeing any `.sqry/graph`.
237 /// Used by callers that want to *create* an index here.
238 BoundaryOnly {
239 /// The first ancestor containing a [`PROJECT_MARKERS`] entry.
240 boundary: PathBuf,
241 /// `true` when the walk started at a regular file (we walk up
242 /// from its parent); `false` when it started at a directory.
243 is_file_scope: bool,
244 },
245 /// Walked the full [`MAX_ANCESTOR_DEPTH`] (or hit the filesystem
246 /// root) without finding either a graph or a project marker.
247 /// Caller must require an explicit `--workspace-root`.
248 None,
249}
250
251/// Walk `start`'s ancestors looking for the canonical project root
252/// for sqry's purposes (per `E_p1_cluster.md` §E.1).
253///
254/// The walker:
255///
256/// 1. Best-effort canonicalises `start` (falls back to `start` on
257/// error).
258/// 2. If `start` is a file, begins from the parent directory.
259/// 3. Walks up to [`MAX_ANCESTOR_DEPTH`] ancestors, recording the
260/// *closest* `.sqry/graph` directory **and** the *closest*
261/// [`PROJECT_MARKERS`] hit. The walk terminates as soon as a
262/// project marker is observed — that marker is the
263/// boundary-of-record even if a graph also exists at a deeper
264/// level.
265/// 4. Maps the `(graph_found, boundary)` pair to the
266/// [`WorkspaceRootDiscovery`] enum:
267/// - graph inside boundary → [`WorkspaceRootDiscovery::GraphFound`].
268/// - graph above boundary (e.g. stray `~/.sqry/graph` while a
269/// `Cargo.toml` lives at `~/work/proj`) →
270/// [`WorkspaceRootDiscovery::BoundaryOnly`] (the outer graph is
271/// discarded).
272/// - graph but no marker → [`WorkspaceRootDiscovery::GraphFound`]
273/// (legacy bare-directory layouts).
274/// - no graph, marker only → [`WorkspaceRootDiscovery::BoundaryOnly`].
275/// - neither → [`WorkspaceRootDiscovery::None`].
276#[must_use]
277pub fn discover_workspace_root(start: &Path) -> WorkspaceRootDiscovery {
278 let canonical = start.canonicalize().unwrap_or_else(|_| start.to_path_buf());
279 let is_file_scope = canonical.is_file();
280 let mut current: PathBuf = if is_file_scope {
281 canonical
282 .parent()
283 .map_or_else(|| canonical.clone(), Path::to_path_buf)
284 } else {
285 canonical
286 };
287
288 let mut graph_found: Option<(PathBuf, usize)> = None;
289 let mut boundary: Option<PathBuf> = None;
290
291 for depth in 0..MAX_ANCESTOR_DEPTH {
292 // (a) Has the *current* directory been indexed?
293 let graph_dir = current.join(".sqry").join("graph");
294 let legacy_index = current.join(".sqry-index");
295 if graph_found.is_none() && (graph_dir.is_dir() || legacy_index.exists()) {
296 graph_found = Some((current.clone(), depth));
297 }
298
299 // (b) Is the *current* directory a project boundary?
300 if boundary.is_none() && PROJECT_MARKERS.iter().any(|m| current.join(m).exists()) {
301 boundary = Some(current.clone());
302 // Project marker terminates the walk — even if no graph
303 // was found, the project root is the boundary-of-record.
304 break;
305 }
306
307 // (c) Walk up.
308 if !current.pop() {
309 break;
310 }
311 }
312
313 match (graph_found, boundary) {
314 (Some((root, depth)), Some(boundary_path)) => {
315 if root.starts_with(&boundary_path) {
316 WorkspaceRootDiscovery::GraphFound {
317 root,
318 boundary: boundary_path,
319 depth,
320 is_file_scope,
321 }
322 } else {
323 // The graph is in an *outer* project (e.g. stray
324 // `~/.sqry/graph` above `~/work/proj/Cargo.toml`).
325 // Discard it — return BoundaryOnly so callers can
326 // build a fresh index inside the project.
327 WorkspaceRootDiscovery::BoundaryOnly {
328 boundary: boundary_path,
329 is_file_scope,
330 }
331 }
332 }
333 (Some((root, depth)), None) => {
334 // Bare-directory legacy layout: no project marker, but a
335 // graph was found. Treat the graph itself as the boundary
336 // to preserve backward compatibility for marker-less
337 // workspaces.
338 WorkspaceRootDiscovery::GraphFound {
339 boundary: root.clone(),
340 root,
341 depth,
342 is_file_scope,
343 }
344 }
345 (None, Some(boundary_path)) => WorkspaceRootDiscovery::BoundaryOnly {
346 boundary: boundary_path,
347 is_file_scope,
348 },
349 (None, None) => WorkspaceRootDiscovery::None,
350 }
351}
352
353/// Error returned by [`assert_no_ancestor_graph`] when a nested
354/// `.sqry/` would be created inside an outer project that already has
355/// one. Rendered with the full recovery template (per
356/// `E_p1_cluster.md` §E.3) so the user sees the offending path, the
357/// ancestor's graph location, and the project boundary together.
358#[derive(Debug, Clone, Error)]
359pub enum NestedIndexError {
360 #[error(
361 "refusing to create a nested .sqry/ index.\n\
362 An ancestor index already exists at: {ancestor_graph}\n\
363 Requested location: {requested}\n\
364 Project boundary detected at: {boundary}\n\
365 \n\
366 If this is intentional (e.g. a sub-project with its own graph), \
367 re-run with --allow-nested.\n\
368 Otherwise: cd to the project root ({boundary}) and run \
369 `sqry update` (incremental) or `sqry index --force` (rebuild).",
370 ancestor_graph = ancestor_graph.display(),
371 requested = requested.display(),
372 boundary = boundary.display(),
373 )]
374 /// Nested-index pollution: a `.sqry/graph` already exists at an
375 /// ancestor of `requested`, and they share the same project
376 /// boundary. The recovery message identifies all three paths.
377 AncestorExists {
378 /// The path the caller asked to index (canonicalised).
379 requested: PathBuf,
380 /// The `.sqry/graph` directory belonging to the outer project.
381 ancestor_graph: PathBuf,
382 /// The first ancestor containing a [`PROJECT_MARKERS`] entry.
383 boundary: PathBuf,
384 },
385}
386
387/// Returns `Ok(())` iff creating a `.sqry/graph` under `requested` is
388/// safe — i.e. there is no ancestor graph belonging to the same
389/// project boundary, OR the caller passed `allow_nested = true`.
390///
391/// Called by `sqry-cli/src/commands/index.rs::run_index` and by
392/// `FilesystemGraphProvider::acquire`'s
393/// `MissingGraphPolicy::AutoBuildIfEnabled` branch (cluster-E
394/// Layer-2).
395///
396/// The condition `requested.starts_with(&boundary)` is the
397/// load-bearing predicate: a graph at `~/.sqry/graph` and a project
398/// at `~/work/proj/.sqry/graph` do NOT share a boundary (the
399/// project's `Cargo.toml` is at `~/work/proj`, not at `~`), so the
400/// guard does not fire for the legitimate "different project" case.
401/// It fires only for the "same project, nested location" case.
402///
403/// # Errors
404///
405/// Returns [`NestedIndexError::AncestorExists`] when an ancestor
406/// graph belongs to the same project boundary as `requested`.
407pub fn assert_no_ancestor_graph(
408 requested: &Path,
409 allow_nested: bool,
410) -> Result<(), NestedIndexError> {
411 if allow_nested {
412 return Ok(());
413 }
414 // Canonicalise *before* delegating so a relative path like `.`
415 // resolves to an absolute path. If `canonicalize` fails (path does
416 // not exist yet), fall back to joining against the caller's cwd —
417 // this preserves the "act on the caller's directory, not the
418 // process's" intent for sub-process and test-harness invocations
419 // where `current.canonicalize()` would walk into an unrelated
420 // ancestor.
421 let canonical_requested = canonicalise_or_join_cwd(requested);
422 // Refuse to evaluate against a still-relative or empty path —
423 // discover_workspace_root would walk `current.join(".sqry/graph")`
424 // relative to the process cwd, producing the cluster-E §E.3
425 // spurious-error mode reported on 2026-05-10.
426 if !canonical_requested.is_absolute() || canonical_requested.as_os_str().is_empty() {
427 return Ok(());
428 }
429 if let WorkspaceRootDiscovery::GraphFound { root, boundary, .. } =
430 discover_workspace_root(&canonical_requested)
431 && canonical_requested != root
432 && canonical_requested.starts_with(&boundary)
433 {
434 return Err(NestedIndexError::AncestorExists {
435 requested: canonical_requested,
436 ancestor_graph: root.join(".sqry").join("graph"),
437 boundary,
438 });
439 }
440 Ok(())
441}
442
443/// Canonicalise `path`, falling back to `cwd.join(path)` (also
444/// canonicalised) when the path does not exist on disk yet — the
445/// `assert_no_ancestor_graph` caller is by definition about to create
446/// the directory, so a strict canonicalise would always fail in the
447/// happy path.
448fn canonicalise_or_join_cwd(path: &Path) -> PathBuf {
449 if let Ok(canon) = path.canonicalize() {
450 return canon;
451 }
452 if path.is_absolute() {
453 return path.to_path_buf();
454 }
455 let Ok(cwd) = std::env::current_dir() else {
456 return path.to_path_buf();
457 };
458 let joined = cwd.join(path);
459 joined.canonicalize().unwrap_or(joined)
460}
461
462// ───────────────────────────────────────────────────────────────────────
463// `WorkspaceCleanReport` types (sqry-mcp flakiness P1, cluster E
464// foundation — `E_p1_cluster.md` §E.4 + Hand-off E4)
465// ───────────────────────────────────────────────────────────────────────
466//
467// Public types for `sqry workspace clean`'s `--json` output. Stable
468// across patch releases inside `schema_version 1`; additive changes
469// only. Cluster-E Layer-2 owns the discovery + serialization logic;
470// foundation only owns the type shape.
471
472/// Top-level JSON shape produced by `sqry workspace clean --json`.
473/// `schema_version 1` is the wire contract for this release; future
474/// additive fields can be added without bumping the version, but a
475/// breaking field rename or removal must increment to `schema_version 2`.
476#[derive(Debug, Clone, serde::Serialize)]
477pub struct WorkspaceCleanReport {
478 /// Always `1` for this release.
479 pub schema_version: u32,
480 /// Canonicalised root path the cleanup walked.
481 pub root: PathBuf,
482 /// The `.sqry/graph` at the project boundary for `root`, if any.
483 /// Populated by [`discover_workspace_root`]; never auto-deleted
484 /// without `--force`.
485 pub canonical_active_artifact: Option<PathBuf>,
486 /// Daemon-locked artifacts surfaced via the `daemon/active-artifacts`
487 /// IPC method. Empty when the daemon is unreachable; the JSON
488 /// envelope additionally surfaces a fallback warning in that
489 /// case (cluster-E Layer-2 owns the warning shape).
490 pub daemon_locked_artifacts: Vec<PathBuf>,
491 /// Every artifact the cleanup walk found, classified.
492 pub discovered: Vec<DiscoveredArtifact>,
493 /// Subset of `discovered` that the policy filter is willing to
494 /// remove (post `is_canonical_active` / `is_daemon_locked` /
495 /// `is_user_state` filtering).
496 pub planned_removals: Vec<PathBuf>,
497 /// Discovered but not planned for removal, with the reason.
498 pub skipped: Vec<SkippedArtifact>,
499 /// Mirrors the `--apply` flag.
500 pub applied: bool,
501 /// Empty when `applied = false`; otherwise the actually-removed
502 /// paths from `planned_removals` (subset on per-entry I/O error).
503 pub removed: Vec<PathBuf>,
504 /// Per-entry removal failures during `--apply`.
505 pub errors: Vec<RemovalError>,
506}
507
508/// A single artifact discovered by the cleanup walk, with the
509/// classification + size + freshness data needed by the dry-run
510/// summary.
511#[derive(Debug, Clone, serde::Serialize)]
512pub struct DiscoveredArtifact {
513 /// Canonicalised absolute path.
514 pub path: PathBuf,
515 /// Classification of this artifact (graph / cache / user state / ...).
516 pub kind: ArtifactKind,
517 /// Sum of all files within (capped at 10 MiB sample for cache
518 /// directories with millions of entries; see `E_p1_cluster.md`
519 /// §E.4 step 5c).
520 pub size_bytes: u64,
521 /// Last-modified time of the artifact root (best-effort; `None`
522 /// when the filesystem does not expose mtime).
523 pub last_modified: Option<chrono::DateTime<chrono::Utc>>,
524 /// `true` when this is the project's canonical
525 /// `.sqry/graph` artifact — never auto-deleted.
526 pub is_canonical_active: bool,
527 /// `true` when the daemon currently has this artifact loaded
528 /// (per the `daemon/active-artifacts` IPC method).
529 pub is_daemon_locked: bool,
530 /// `true` when this is `.sqry-index.user` — user-curated state
531 /// hidden behind `--include-user-state`.
532 pub is_user_state: bool,
533}
534
535/// Classification of a discovered artifact. The kind drives the
536/// policy filter:
537///
538/// | Kind | Default behaviour without flags |
539/// |------------------|-----------------------------------------|
540/// | `Graph` | Skipped if `is_canonical_active` (or `is_daemon_locked`) without `--force` |
541/// | `GraphRoot` | Same as `Graph` |
542/// | `Cache` | Removed |
543/// | `Prof` | Removed |
544/// | `UserState` | Skipped without `--include-user-state` |
545/// | `LegacyIndex` | Removed |
546/// | `WorkspaceRegistry` | Always skipped (never auto-deleted) |
547/// | `NestedGraph` | Removed unless `is_daemon_locked` |
548#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
549pub enum ArtifactKind {
550 /// `<root>/.sqry/graph` — canonical unified graph snapshot.
551 Graph,
552 /// `<root>/.sqry/` — parent of `Graph`. Listed separately so the
553 /// dry-run can show "graph + cache + manifest" in one entry.
554 GraphRoot,
555 /// `<root>/.sqry-cache` — incremental indexer cache.
556 Cache,
557 /// `<root>/.sqry-prof` — profiler dumps (legacy / external).
558 Prof,
559 /// `<root>/.sqry-index.user` — user-curated state (aliases,
560 /// recent queries). NEVER auto-deleted.
561 UserState,
562 /// `<root>/.sqry-index` — legacy v1 index marker file. Stale
563 /// since v2.0.0; safe to delete unconditionally.
564 LegacyIndex,
565 /// `<root>/.sqry-workspace` — multi-repo registry. NEVER
566 /// auto-deleted.
567 WorkspaceRegistry,
568 /// `.sqry/graph` discovered inside an outer project that already
569 /// has its own canonical graph (E.3 nested-index pollution).
570 NestedGraph,
571}
572
573/// Discovered but not planned for removal — `reason` carries the
574/// policy verdict so the dry-run output can explain why.
575#[derive(Debug, Clone, serde::Serialize)]
576pub struct SkippedArtifact {
577 /// Canonicalised path of the skipped artifact.
578 pub path: PathBuf,
579 /// Why the policy filter skipped this entry.
580 pub reason: SkipReason,
581}
582
583/// Why a discovered artifact was skipped from `planned_removals`.
584#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
585pub enum SkipReason {
586 /// Path equals the project's current canonical
587 /// `.sqry/graph` artifact.
588 CanonicalActive,
589 /// Daemon currently has the artifact loaded.
590 DaemonLocked,
591 /// `.sqry-index.user` without `--include-user-state`.
592 UserState,
593 /// `.sqry-workspace` registry — never auto-deleted.
594 WorkspaceRegistry,
595 /// Symlink that the walker refused to follow.
596 SymlinkRefused,
597 /// Path canonicalised outside the cleanup `root`.
598 OutsideRoot,
599}
600
601/// Per-entry removal failure (e.g. `EACCES` on `remove_dir_all`).
602#[derive(Debug, Clone, serde::Serialize)]
603pub struct RemovalError {
604 /// Canonicalised path the cleanup tried to remove.
605 pub path: PathBuf,
606 /// `Display` form of the underlying I/O error.
607 pub error: String,
608}
609
610#[cfg(test)]
611mod ancestor_tests {
612 use super::*;
613 use tempfile::TempDir;
614
615 /// Sanity: discovery returns `None` for a deeply-nested empty
616 /// hierarchy (no markers, no graphs).
617 #[test]
618 fn discover_returns_none_for_empty_hierarchy() {
619 let tmp = TempDir::new().unwrap();
620 let leaf = tmp.path().join("a/b/c");
621 std::fs::create_dir_all(&leaf).unwrap();
622 let outcome = discover_workspace_root(&leaf);
623 // Either None (filesystem root above tmp has no marker) or
624 // BoundaryOnly (the filesystem root happens to host a
625 // marker like `.git`). Both are acceptable here; the test
626 // pins that GraphFound is NOT returned without a graph.
627 assert!(
628 !matches!(outcome, WorkspaceRootDiscovery::GraphFound { .. }),
629 "no .sqry/graph above leaf, expected None or BoundaryOnly, got {outcome:?}"
630 );
631 }
632
633 /// `Cargo.toml` at the project root halts the walk and the
634 /// outcome is `BoundaryOnly` when no graph exists.
635 #[test]
636 fn discover_stops_at_cargo_toml_marker_with_no_graph() {
637 let tmp = TempDir::new().unwrap();
638 let proj = tmp.path().join("proj");
639 let sub = proj.join("sub/deep");
640 std::fs::create_dir_all(&sub).unwrap();
641 std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
642 let outcome = discover_workspace_root(&sub);
643 match outcome {
644 WorkspaceRootDiscovery::BoundaryOnly { boundary, .. } => {
645 assert_eq!(
646 boundary.canonicalize().unwrap(),
647 proj.canonicalize().unwrap(),
648 "boundary must equal proj root"
649 );
650 }
651 other => panic!("expected BoundaryOnly, got {other:?}"),
652 }
653 }
654
655 /// `.sqry/graph` inside the project boundary returns
656 /// `GraphFound` with both fields populated.
657 #[test]
658 fn discover_returns_graph_found_when_graph_inside_boundary() {
659 let tmp = TempDir::new().unwrap();
660 let proj = tmp.path().join("proj");
661 let sub = proj.join("sub");
662 std::fs::create_dir_all(&sub).unwrap();
663 std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
664 std::fs::create_dir_all(proj.join(".sqry").join("graph")).unwrap();
665 let outcome = discover_workspace_root(&sub);
666 match outcome {
667 WorkspaceRootDiscovery::GraphFound { root, boundary, .. } => {
668 assert_eq!(root.canonicalize().unwrap(), proj.canonicalize().unwrap());
669 assert_eq!(
670 boundary.canonicalize().unwrap(),
671 proj.canonicalize().unwrap()
672 );
673 }
674 other => panic!("expected GraphFound, got {other:?}"),
675 }
676 }
677
678 /// Stray `~/.sqry/graph` outside the project boundary does NOT
679 /// satisfy `GraphFound` — the project marker wins. (The exact
680 /// reproducer from `E_p1_cluster.md` §E.1 "stray ~/.sqry/graph".)
681 #[test]
682 fn discover_discards_outer_graph_when_inner_marker_exists() {
683 let tmp = TempDir::new().unwrap();
684 let outer = tmp.path();
685 std::fs::create_dir_all(outer.join(".sqry").join("graph")).unwrap();
686 let proj = outer.join("work/new-project");
687 std::fs::create_dir_all(&proj).unwrap();
688 std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
689 let outcome = discover_workspace_root(&proj);
690 match outcome {
691 WorkspaceRootDiscovery::BoundaryOnly { boundary, .. } => {
692 assert_eq!(
693 boundary.canonicalize().unwrap(),
694 proj.canonicalize().unwrap(),
695 "boundary should be the inner project root, not the outer stray graph"
696 );
697 }
698 other => {
699 panic!("outer-graph + inner-marker must collapse to BoundaryOnly, got {other:?}")
700 }
701 }
702 }
703
704 /// `assert_no_ancestor_graph(requested, false)` rejects nested
705 /// `.sqry/graph` creation when the same project already has one.
706 #[test]
707 fn assert_no_ancestor_graph_rejects_nested_creation() {
708 let tmp = TempDir::new().unwrap();
709 let proj = tmp.path().join("proj");
710 std::fs::create_dir_all(proj.join(".sqry").join("graph")).unwrap();
711 std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
712 let nested = proj.join("sub");
713 std::fs::create_dir_all(&nested).unwrap();
714 let err = assert_no_ancestor_graph(&nested, false)
715 .expect_err("nested creation must error when ancestor graph exists");
716 assert!(matches!(err, NestedIndexError::AncestorExists { .. }));
717 }
718
719 /// `allow_nested = true` bypasses the guard.
720 #[test]
721 fn assert_no_ancestor_graph_passes_with_allow_nested() {
722 let tmp = TempDir::new().unwrap();
723 let proj = tmp.path().join("proj");
724 std::fs::create_dir_all(proj.join(".sqry").join("graph")).unwrap();
725 std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
726 let nested = proj.join("sub");
727 std::fs::create_dir_all(&nested).unwrap();
728 assert!(assert_no_ancestor_graph(&nested, true).is_ok());
729 }
730}