Skip to main content

sqry_core/workspace/
discovery.rs

1//! Workspace repository discovery utilities.
2//!
3//! Discovery scans a workspace root for repositories that have been indexed
4//! by `sqry index`. The canonical marker is `.sqry/graph/manifest.json` —
5//! the same file that `build_unified_graph_inner` writes (see
6//! `graph/unified/persistence/mod.rs`'s `GRAPH_DIR_NAME` and
7//! `MANIFEST_FILE_NAME` constants). The earlier `.sqry-index` placeholder
8//! was never written by the live build pipeline and is removed outright;
9//! there is no legacy fallback (RR-10 Gap #2 retains the per-workspace
10//! repository cap to bound walker work regardless of marker name).
11//!
12//! The walker honours `.gitignore` rules and additionally skips a small
13//! set of dependency / build directories whose contents must never be
14//! treated as discoverable repositories even when those directories are
15//! present without a `.gitignore` (e.g. `node_modules`, `target`). The
16//! ignore list is the repo-wide
17//! [`crate::project::path_utils::DEFAULT_IGNORED_DIRS`] (consulted via
18//! [`crate::project::path_utils::is_ignored_dir`]) so workspace discovery
19//! and single-repo project detection share one source of truth.
20
21use std::fs;
22use std::path::{Path, PathBuf};
23
24use ignore::WalkBuilder;
25use thiserror::Error;
26
27use super::error::{WorkspaceError, WorkspaceResult};
28use super::registry::{WorkspaceRepoId, WorkspaceRepository};
29// RR-10 Gap #2: Import repository count limit for DoS prevention
30use crate::config::buffers::max_repositories;
31// Repo-wide source of truth for directories to skip during repo discovery.
32use crate::project::path_utils::is_ignored_dir;
33
34/// Canonical marker filename written by `sqry index` under
35/// `<repo>/.sqry/graph/`. Discovery treats any file whose name matches this
36/// constant and whose parent directory is `.sqry/graph` as evidence of a
37/// repository root one level above.
38const MANIFEST_FILE_NAME: &str = "manifest.json";
39
40/// Directory segment containing [`MANIFEST_FILE_NAME`]. Used to validate
41/// that a candidate `manifest.json` actually lives inside a sqry graph
42/// directory (and not, say, an unrelated NPM `manifest.json`).
43const GRAPH_DIR_SEGMENT: &str = "graph";
44
45/// Parent of [`GRAPH_DIR_SEGMENT`]. The full canonical relative path is
46/// `.sqry/graph/manifest.json`.
47const SQRY_DIR_SEGMENT: &str = ".sqry";
48
49/// Discovery strategy for locating repositories within a workspace root.
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum DiscoveryMode {
52    /// Locate repositories by finding `.sqry/graph/manifest.json` markers
53    /// anywhere under root.
54    IndexFiles,
55    /// Only include repositories that are git roots (contain `.git/`) with
56    /// an index marker.
57    GitRoots,
58}
59
60/// Discover repositories beneath `root` according to `mode`.
61///
62/// # Errors
63///
64/// Returns [`WorkspaceError`] when filesystem traversal fails.
65pub fn discover_repositories(
66    root: &Path,
67    mode: DiscoveryMode,
68) -> WorkspaceResult<Vec<WorkspaceRepository>> {
69    let mut repositories = Vec::new();
70
71    let walker = WalkBuilder::new(root)
72        .hidden(false)
73        .ignore(false)
74        .git_ignore(true)
75        .git_exclude(true)
76        .parents(true)
77        .filter_entry(|entry| {
78            // Skip well-known dependency / build directories so discovery
79            // never wastes work descending into them. The ignore list is
80            // owned by `crate::project::path_utils::DEFAULT_IGNORED_DIRS`
81            // and consulted through `is_ignored_dir` to keep workspace
82            // discovery and single-repo project detection in lockstep.
83            !is_ignored_dir(entry.file_name())
84        })
85        .build();
86
87    for result in walker {
88        let entry = match result {
89            Ok(ok) => ok,
90            Err(err) => {
91                let message = err.to_string();
92                let io_err = err
93                    .into_io_error()
94                    .unwrap_or_else(|| std::io::Error::other(message));
95                return Err(WorkspaceError::Discovery {
96                    root: root.to_path_buf(),
97                    source: io_err,
98                });
99            }
100        };
101
102        if entry.file_type().is_some_and(|ft| ft.is_dir()) {
103            continue;
104        }
105
106        if entry.file_name() != MANIFEST_FILE_NAME {
107            continue;
108        }
109
110        let manifest_path = entry.into_path();
111
112        // Validate the candidate sits under `<repo>/.sqry/graph/manifest.json`.
113        // Without this guard, any `manifest.json` (e.g. NPM's package
114        // manifest) would be misclassified as a sqry index marker.
115        let Some(graph_dir) = manifest_path.parent() else {
116            continue;
117        };
118        if graph_dir.file_name().and_then(|s| s.to_str()) != Some(GRAPH_DIR_SEGMENT) {
119            continue;
120        }
121        let Some(sqry_dir) = graph_dir.parent() else {
122            continue;
123        };
124        if sqry_dir.file_name().and_then(|s| s.to_str()) != Some(SQRY_DIR_SEGMENT) {
125            continue;
126        }
127        let Some(repo_root) = sqry_dir.parent().map(Path::to_path_buf) else {
128            continue;
129        };
130
131        if matches!(mode, DiscoveryMode::GitRoots) && !repo_root.join(".git").is_dir() {
132            continue;
133        }
134
135        let relative_path = repo_root.strip_prefix(root).unwrap_or(repo_root.as_path());
136        let repo_id = WorkspaceRepoId::new(relative_path);
137        let name = repo_root.file_name().map_or_else(
138            || repo_id.as_str().to_string(),
139            |os| os.to_string_lossy().into_owned(),
140        );
141
142        let metadata = fs::metadata(&manifest_path);
143        let last_indexed_at = metadata.ok().and_then(|meta| meta.modified().ok());
144
145        // RR-10 Gap #2: Enforce repository count limit to prevent DoS via
146        // workspaces containing thousands of indexed repositories.
147        let max_repos = max_repositories();
148        if repositories.len() >= max_repos {
149            return Err(WorkspaceError::TooManyRepositories {
150                found: repositories.len(),
151                limit: max_repos,
152            });
153        }
154
155        repositories.push(WorkspaceRepository::new(
156            repo_id,
157            name,
158            repo_root,
159            manifest_path,
160            last_indexed_at,
161        ));
162    }
163
164    repositories.sort_by(|a, b| a.id.cmp(&b.id));
165    repositories.dedup_by(|a, b| a.id == b.id);
166    Ok(repositories)
167}
168
169// ───────────────────────────────────────────────────────────────────────
170// Ancestor-walk discovery (sqry-mcp flakiness P1, cluster E foundation)
171// ───────────────────────────────────────────────────────────────────────
172//
173// The ancestor-walk discovery below is a **separate concern** from
174// `discover_repositories` above. Where `discover_repositories` walks
175// **down** a workspace root looking for indexed repositories, the
176// ancestor walker walks **up** from a starting path to locate the
177// project boundary that should anchor `.sqry/graph` lookups.
178//
179// Source: `E_p1_cluster.md` §E.1 + `E_p1_cluster.md` §Hand-offs +
180// `00_contracts.md` §3.CC-4. The ancestor walk fixes #237 (workspace
181// discovery and plugin-selection recovery) and gates #239 (workspace
182// artifact hygiene + nested-index creation guard).
183
184/// Maximum ancestor walk depth for [`discover_workspace_root`]. Mirrors
185/// the `MAX_ANCESTOR_DEPTH = 64` already used by
186/// `sqry-core/src/graph/acquisition.rs::find_workspace_root` and
187/// `sqry-cli/src/index_discovery.rs::find_nearest_index`. Bound is
188/// O(64) lstat calls per discovery — cheap enough that no caching is
189/// required, and small enough that no realistic project layout
190/// approaches the limit.
191pub const MAX_ANCESTOR_DEPTH: usize = 64;
192
193/// Project-boundary marker filenames consulted by
194/// [`discover_workspace_root`]. The walker stops at the **first**
195/// ancestor containing any of these markers, even if no
196/// `.sqry/graph` exists in or above that ancestor. Order does not
197/// matter — presence of any marker terminates the walk.
198///
199/// The five chosen markers cover ~95% of sqry's known user base by
200/// language. Expansion to `setup.py`, `Gemfile`, `mix.exs`,
201/// `build.gradle`, `pom.xml`, or `composer.json` is deliberately
202/// deferred until field reports show false-`None` discoveries in
203/// Ruby / Elixir / JVM monorepos (see `E_p1_cluster.md` Open Q4).
204pub const PROJECT_MARKERS: &[&str] = &[
205    ".git",
206    "Cargo.toml",
207    "package.json",
208    "pyproject.toml",
209    "go.mod",
210];
211
212/// Outcome of [`discover_workspace_root`]. Distinguishes "found a
213/// graph inside the project boundary" from "hit the project boundary
214/// with no graph above it" so callers that *create* indexes (e.g.
215/// `sqry index`) can refuse to descend into ancestors of an outer
216/// project.
217#[derive(Debug, Clone, PartialEq, Eq)]
218pub enum WorkspaceRootDiscovery {
219    /// `.sqry/graph` (or legacy `.sqry-index`) found at `root` AT OR
220    /// INSIDE the project boundary at `boundary`. `depth` is the
221    /// ancestor distance from the discovery starting point to `root`.
222    GraphFound {
223        /// The directory immediately above `.sqry/graph`. Use this as
224        /// the workspace root for graph-load operations.
225        root: PathBuf,
226        /// The first ancestor containing a [`PROJECT_MARKERS`] entry.
227        /// Equal to `root` when the project marker lives at the same
228        /// directory as the `.sqry/graph` artifact.
229        boundary: PathBuf,
230        /// Depth from the original starting point.
231        depth: usize,
232        /// `true` when the walk started at a regular file (we walk up
233        /// from its parent); `false` when it started at a directory.
234        is_file_scope: bool,
235    },
236    /// Project boundary reached without seeing any `.sqry/graph`.
237    /// Used by callers that want to *create* an index here.
238    BoundaryOnly {
239        /// The first ancestor containing a [`PROJECT_MARKERS`] entry.
240        boundary: PathBuf,
241        /// `true` when the walk started at a regular file (we walk up
242        /// from its parent); `false` when it started at a directory.
243        is_file_scope: bool,
244    },
245    /// Walked the full [`MAX_ANCESTOR_DEPTH`] (or hit the filesystem
246    /// root) without finding either a graph or a project marker.
247    /// Caller must require an explicit `--workspace-root`.
248    None,
249}
250
251/// Walk `start`'s ancestors looking for the canonical project root
252/// for sqry's purposes (per `E_p1_cluster.md` §E.1).
253///
254/// The walker:
255///
256/// 1. Best-effort canonicalises `start` (falls back to `start` on
257///    error).
258/// 2. If `start` is a file, begins from the parent directory.
259/// 3. Walks up to [`MAX_ANCESTOR_DEPTH`] ancestors, recording the
260///    *closest* `.sqry/graph` directory **and** the *closest*
261///    [`PROJECT_MARKERS`] hit. The walk terminates as soon as a
262///    project marker is observed — that marker is the
263///    boundary-of-record even if a graph also exists at a deeper
264///    level.
265/// 4. Maps the `(graph_found, boundary)` pair to the
266///    [`WorkspaceRootDiscovery`] enum:
267///    - graph inside boundary → [`WorkspaceRootDiscovery::GraphFound`].
268///    - graph above boundary (e.g. stray `~/.sqry/graph` while a
269///      `Cargo.toml` lives at `~/work/proj`) →
270///      [`WorkspaceRootDiscovery::BoundaryOnly`] (the outer graph is
271///      discarded).
272///    - graph but no marker → [`WorkspaceRootDiscovery::GraphFound`]
273///      (legacy bare-directory layouts).
274///    - no graph, marker only → [`WorkspaceRootDiscovery::BoundaryOnly`].
275///    - neither → [`WorkspaceRootDiscovery::None`].
276#[must_use]
277pub fn discover_workspace_root(start: &Path) -> WorkspaceRootDiscovery {
278    let canonical = start.canonicalize().unwrap_or_else(|_| start.to_path_buf());
279    let is_file_scope = canonical.is_file();
280    let mut current: PathBuf = if is_file_scope {
281        canonical
282            .parent()
283            .map_or_else(|| canonical.clone(), Path::to_path_buf)
284    } else {
285        canonical
286    };
287
288    let mut graph_found: Option<(PathBuf, usize)> = None;
289    let mut boundary: Option<PathBuf> = None;
290
291    for depth in 0..MAX_ANCESTOR_DEPTH {
292        // (a) Has the *current* directory been indexed?
293        let graph_dir = current.join(".sqry").join("graph");
294        let legacy_index = current.join(".sqry-index");
295        if graph_found.is_none() && (graph_dir.is_dir() || legacy_index.exists()) {
296            graph_found = Some((current.clone(), depth));
297        }
298
299        // (b) Is the *current* directory a project boundary?
300        if boundary.is_none() && PROJECT_MARKERS.iter().any(|m| current.join(m).exists()) {
301            boundary = Some(current.clone());
302            // Project marker terminates the walk — even if no graph
303            // was found, the project root is the boundary-of-record.
304            break;
305        }
306
307        // (c) Walk up.
308        if !current.pop() {
309            break;
310        }
311    }
312
313    match (graph_found, boundary) {
314        (Some((root, depth)), Some(boundary_path)) => {
315            if root.starts_with(&boundary_path) {
316                WorkspaceRootDiscovery::GraphFound {
317                    root,
318                    boundary: boundary_path,
319                    depth,
320                    is_file_scope,
321                }
322            } else {
323                // The graph is in an *outer* project (e.g. stray
324                // `~/.sqry/graph` above `~/work/proj/Cargo.toml`).
325                // Discard it — return BoundaryOnly so callers can
326                // build a fresh index inside the project.
327                WorkspaceRootDiscovery::BoundaryOnly {
328                    boundary: boundary_path,
329                    is_file_scope,
330                }
331            }
332        }
333        (Some((root, depth)), None) => {
334            // Bare-directory legacy layout: no project marker, but a
335            // graph was found. Treat the graph itself as the boundary
336            // to preserve backward compatibility for marker-less
337            // workspaces.
338            WorkspaceRootDiscovery::GraphFound {
339                boundary: root.clone(),
340                root,
341                depth,
342                is_file_scope,
343            }
344        }
345        (None, Some(boundary_path)) => WorkspaceRootDiscovery::BoundaryOnly {
346            boundary: boundary_path,
347            is_file_scope,
348        },
349        (None, None) => WorkspaceRootDiscovery::None,
350    }
351}
352
353/// Error returned by [`assert_no_ancestor_graph`] when a nested
354/// `.sqry/` would be created inside an outer project that already has
355/// one. Rendered with the full recovery template (per
356/// `E_p1_cluster.md` §E.3) so the user sees the offending path, the
357/// ancestor's graph location, and the project boundary together.
358#[derive(Debug, Clone, Error)]
359pub enum NestedIndexError {
360    #[error(
361        "refusing to create a nested .sqry/ index.\n\
362         An ancestor index already exists at: {ancestor_graph}\n\
363         Requested location:                  {requested}\n\
364         Project boundary detected at:        {boundary}\n\
365         \n\
366         If this is intentional (e.g. a sub-project with its own graph), \
367         re-run with --allow-nested.\n\
368         Otherwise: cd to the project root ({boundary}) and run \
369         `sqry update` (incremental) or `sqry index --force` (rebuild).",
370        ancestor_graph = ancestor_graph.display(),
371        requested = requested.display(),
372        boundary = boundary.display(),
373    )]
374    /// Nested-index pollution: a `.sqry/graph` already exists at an
375    /// ancestor of `requested`, and they share the same project
376    /// boundary. The recovery message identifies all three paths.
377    AncestorExists {
378        /// The path the caller asked to index (canonicalised).
379        requested: PathBuf,
380        /// The `.sqry/graph` directory belonging to the outer project.
381        ancestor_graph: PathBuf,
382        /// The first ancestor containing a [`PROJECT_MARKERS`] entry.
383        boundary: PathBuf,
384    },
385}
386
387/// Returns `Ok(())` iff creating a `.sqry/graph` under `requested` is
388/// safe — i.e. there is no ancestor graph belonging to the same
389/// project boundary, OR the caller passed `allow_nested = true`.
390///
391/// Called by `sqry-cli/src/commands/index.rs::run_index` and by
392/// `FilesystemGraphProvider::acquire`'s
393/// `MissingGraphPolicy::AutoBuildIfEnabled` branch (cluster-E
394/// Layer-2).
395///
396/// The condition `requested.starts_with(&boundary)` is the
397/// load-bearing predicate: a graph at `~/.sqry/graph` and a project
398/// at `~/work/proj/.sqry/graph` do NOT share a boundary (the
399/// project's `Cargo.toml` is at `~/work/proj`, not at `~`), so the
400/// guard does not fire for the legitimate "different project" case.
401/// It fires only for the "same project, nested location" case.
402///
403/// # Errors
404///
405/// Returns [`NestedIndexError::AncestorExists`] when an ancestor
406/// graph belongs to the same project boundary as `requested`.
407pub fn assert_no_ancestor_graph(
408    requested: &Path,
409    allow_nested: bool,
410) -> Result<(), NestedIndexError> {
411    if allow_nested {
412        return Ok(());
413    }
414    // Canonicalise *before* delegating so a relative path like `.`
415    // resolves to an absolute path. If `canonicalize` fails (path does
416    // not exist yet), fall back to joining against the caller's cwd —
417    // this preserves the "act on the caller's directory, not the
418    // process's" intent for sub-process and test-harness invocations
419    // where `current.canonicalize()` would walk into an unrelated
420    // ancestor.
421    let canonical_requested = canonicalise_or_join_cwd(requested);
422    // Refuse to evaluate against a still-relative or empty path —
423    // discover_workspace_root would walk `current.join(".sqry/graph")`
424    // relative to the process cwd, producing the cluster-E §E.3
425    // spurious-error mode reported on 2026-05-10.
426    if !canonical_requested.is_absolute() || canonical_requested.as_os_str().is_empty() {
427        return Ok(());
428    }
429    if let WorkspaceRootDiscovery::GraphFound { root, boundary, .. } =
430        discover_workspace_root(&canonical_requested)
431        && canonical_requested != root
432        && canonical_requested.starts_with(&boundary)
433    {
434        return Err(NestedIndexError::AncestorExists {
435            requested: canonical_requested,
436            ancestor_graph: root.join(".sqry").join("graph"),
437            boundary,
438        });
439    }
440    Ok(())
441}
442
443/// Canonicalise `path`, falling back to `cwd.join(path)` (also
444/// canonicalised) when the path does not exist on disk yet — the
445/// `assert_no_ancestor_graph` caller is by definition about to create
446/// the directory, so a strict canonicalise would always fail in the
447/// happy path.
448fn canonicalise_or_join_cwd(path: &Path) -> PathBuf {
449    if let Ok(canon) = path.canonicalize() {
450        return canon;
451    }
452    if path.is_absolute() {
453        return path.to_path_buf();
454    }
455    let Ok(cwd) = std::env::current_dir() else {
456        return path.to_path_buf();
457    };
458    let joined = cwd.join(path);
459    joined.canonicalize().unwrap_or(joined)
460}
461
462// ───────────────────────────────────────────────────────────────────────
463// `WorkspaceCleanReport` types (sqry-mcp flakiness P1, cluster E
464// foundation — `E_p1_cluster.md` §E.4 + Hand-off E4)
465// ───────────────────────────────────────────────────────────────────────
466//
467// Public types for `sqry workspace clean`'s `--json` output. Stable
468// across patch releases inside `schema_version 1`; additive changes
469// only. Cluster-E Layer-2 owns the discovery + serialization logic;
470// foundation only owns the type shape.
471
472/// Top-level JSON shape produced by `sqry workspace clean --json`.
473/// `schema_version 1` is the wire contract for this release; future
474/// additive fields can be added without bumping the version, but a
475/// breaking field rename or removal must increment to `schema_version 2`.
476#[derive(Debug, Clone, serde::Serialize)]
477pub struct WorkspaceCleanReport {
478    /// Always `1` for this release.
479    pub schema_version: u32,
480    /// Canonicalised root path the cleanup walked.
481    pub root: PathBuf,
482    /// The `.sqry/graph` at the project boundary for `root`, if any.
483    /// Populated by [`discover_workspace_root`]; never auto-deleted
484    /// without `--force`.
485    pub canonical_active_artifact: Option<PathBuf>,
486    /// Daemon-locked artifacts surfaced via the `daemon/active-artifacts`
487    /// IPC method. Empty when the daemon is unreachable; the JSON
488    /// envelope additionally surfaces a fallback warning in that
489    /// case (cluster-E Layer-2 owns the warning shape).
490    pub daemon_locked_artifacts: Vec<PathBuf>,
491    /// Every artifact the cleanup walk found, classified.
492    pub discovered: Vec<DiscoveredArtifact>,
493    /// Subset of `discovered` that the policy filter is willing to
494    /// remove (post `is_canonical_active` / `is_daemon_locked` /
495    /// `is_user_state` filtering).
496    pub planned_removals: Vec<PathBuf>,
497    /// Discovered but not planned for removal, with the reason.
498    pub skipped: Vec<SkippedArtifact>,
499    /// Mirrors the `--apply` flag.
500    pub applied: bool,
501    /// Empty when `applied = false`; otherwise the actually-removed
502    /// paths from `planned_removals` (subset on per-entry I/O error).
503    pub removed: Vec<PathBuf>,
504    /// Per-entry removal failures during `--apply`.
505    pub errors: Vec<RemovalError>,
506}
507
508/// A single artifact discovered by the cleanup walk, with the
509/// classification + size + freshness data needed by the dry-run
510/// summary.
511#[derive(Debug, Clone, serde::Serialize)]
512pub struct DiscoveredArtifact {
513    /// Canonicalised absolute path.
514    pub path: PathBuf,
515    /// Classification of this artifact (graph / cache / user state / ...).
516    pub kind: ArtifactKind,
517    /// Sum of all files within (capped at 10 MiB sample for cache
518    /// directories with millions of entries; see `E_p1_cluster.md`
519    /// §E.4 step 5c).
520    pub size_bytes: u64,
521    /// Last-modified time of the artifact root (best-effort; `None`
522    /// when the filesystem does not expose mtime).
523    pub last_modified: Option<chrono::DateTime<chrono::Utc>>,
524    /// `true` when this is the project's canonical
525    /// `.sqry/graph` artifact — never auto-deleted.
526    pub is_canonical_active: bool,
527    /// `true` when the daemon currently has this artifact loaded
528    /// (per the `daemon/active-artifacts` IPC method).
529    pub is_daemon_locked: bool,
530    /// `true` when this is `.sqry-index.user` — user-curated state
531    /// hidden behind `--include-user-state`.
532    pub is_user_state: bool,
533}
534
535/// Classification of a discovered artifact. The kind drives the
536/// policy filter:
537///
538/// | Kind             | Default behaviour without flags         |
539/// |------------------|-----------------------------------------|
540/// | `Graph`          | Skipped if `is_canonical_active` (or `is_daemon_locked`) without `--force` |
541/// | `GraphRoot`      | Same as `Graph`                          |
542/// | `Cache`          | Removed                                  |
543/// | `Prof`           | Removed                                  |
544/// | `UserState`      | Skipped without `--include-user-state`   |
545/// | `LegacyIndex`    | Removed                                  |
546/// | `WorkspaceRegistry` | Always skipped (never auto-deleted)   |
547/// | `NestedGraph`    | Removed unless `is_daemon_locked`        |
548#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
549pub enum ArtifactKind {
550    /// `<root>/.sqry/graph` — canonical unified graph snapshot.
551    Graph,
552    /// `<root>/.sqry/` — parent of `Graph`. Listed separately so the
553    /// dry-run can show "graph + cache + manifest" in one entry.
554    GraphRoot,
555    /// `<root>/.sqry-cache` — incremental indexer cache.
556    Cache,
557    /// `<root>/.sqry-prof` — profiler dumps (legacy / external).
558    Prof,
559    /// `<root>/.sqry-index.user` — user-curated state (aliases,
560    /// recent queries). NEVER auto-deleted.
561    UserState,
562    /// `<root>/.sqry-index` — legacy v1 index marker file. Stale
563    /// since v2.0.0; safe to delete unconditionally.
564    LegacyIndex,
565    /// `<root>/.sqry-workspace` — multi-repo registry. NEVER
566    /// auto-deleted.
567    WorkspaceRegistry,
568    /// `.sqry/graph` discovered inside an outer project that already
569    /// has its own canonical graph (E.3 nested-index pollution).
570    NestedGraph,
571}
572
573/// Discovered but not planned for removal — `reason` carries the
574/// policy verdict so the dry-run output can explain why.
575#[derive(Debug, Clone, serde::Serialize)]
576pub struct SkippedArtifact {
577    /// Canonicalised path of the skipped artifact.
578    pub path: PathBuf,
579    /// Why the policy filter skipped this entry.
580    pub reason: SkipReason,
581}
582
583/// Why a discovered artifact was skipped from `planned_removals`.
584#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
585pub enum SkipReason {
586    /// Path equals the project's current canonical
587    /// `.sqry/graph` artifact.
588    CanonicalActive,
589    /// Daemon currently has the artifact loaded.
590    DaemonLocked,
591    /// `.sqry-index.user` without `--include-user-state`.
592    UserState,
593    /// `.sqry-workspace` registry — never auto-deleted.
594    WorkspaceRegistry,
595    /// Symlink that the walker refused to follow.
596    SymlinkRefused,
597    /// Path canonicalised outside the cleanup `root`.
598    OutsideRoot,
599}
600
601/// Per-entry removal failure (e.g. `EACCES` on `remove_dir_all`).
602#[derive(Debug, Clone, serde::Serialize)]
603pub struct RemovalError {
604    /// Canonicalised path the cleanup tried to remove.
605    pub path: PathBuf,
606    /// `Display` form of the underlying I/O error.
607    pub error: String,
608}
609
610#[cfg(test)]
611mod ancestor_tests {
612    use super::*;
613    use tempfile::TempDir;
614
615    /// Sanity: discovery returns `None` for a deeply-nested empty
616    /// hierarchy (no markers, no graphs).
617    #[test]
618    fn discover_returns_none_for_empty_hierarchy() {
619        let tmp = TempDir::new().unwrap();
620        let leaf = tmp.path().join("a/b/c");
621        std::fs::create_dir_all(&leaf).unwrap();
622        let outcome = discover_workspace_root(&leaf);
623        // Either None (filesystem root above tmp has no marker) or
624        // BoundaryOnly (the filesystem root happens to host a
625        // marker like `.git`). Both are acceptable here; the test
626        // pins that GraphFound is NOT returned without a graph.
627        assert!(
628            !matches!(outcome, WorkspaceRootDiscovery::GraphFound { .. }),
629            "no .sqry/graph above leaf, expected None or BoundaryOnly, got {outcome:?}"
630        );
631    }
632
633    /// `Cargo.toml` at the project root halts the walk and the
634    /// outcome is `BoundaryOnly` when no graph exists.
635    #[test]
636    fn discover_stops_at_cargo_toml_marker_with_no_graph() {
637        let tmp = TempDir::new().unwrap();
638        let proj = tmp.path().join("proj");
639        let sub = proj.join("sub/deep");
640        std::fs::create_dir_all(&sub).unwrap();
641        std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
642        let outcome = discover_workspace_root(&sub);
643        match outcome {
644            WorkspaceRootDiscovery::BoundaryOnly { boundary, .. } => {
645                assert_eq!(
646                    boundary.canonicalize().unwrap(),
647                    proj.canonicalize().unwrap(),
648                    "boundary must equal proj root"
649                );
650            }
651            other => panic!("expected BoundaryOnly, got {other:?}"),
652        }
653    }
654
655    /// `.sqry/graph` inside the project boundary returns
656    /// `GraphFound` with both fields populated.
657    #[test]
658    fn discover_returns_graph_found_when_graph_inside_boundary() {
659        let tmp = TempDir::new().unwrap();
660        let proj = tmp.path().join("proj");
661        let sub = proj.join("sub");
662        std::fs::create_dir_all(&sub).unwrap();
663        std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
664        std::fs::create_dir_all(proj.join(".sqry").join("graph")).unwrap();
665        let outcome = discover_workspace_root(&sub);
666        match outcome {
667            WorkspaceRootDiscovery::GraphFound { root, boundary, .. } => {
668                assert_eq!(root.canonicalize().unwrap(), proj.canonicalize().unwrap());
669                assert_eq!(
670                    boundary.canonicalize().unwrap(),
671                    proj.canonicalize().unwrap()
672                );
673            }
674            other => panic!("expected GraphFound, got {other:?}"),
675        }
676    }
677
678    /// Stray `~/.sqry/graph` outside the project boundary does NOT
679    /// satisfy `GraphFound` — the project marker wins. (The exact
680    /// reproducer from `E_p1_cluster.md` §E.1 "stray ~/.sqry/graph".)
681    #[test]
682    fn discover_discards_outer_graph_when_inner_marker_exists() {
683        let tmp = TempDir::new().unwrap();
684        let outer = tmp.path();
685        std::fs::create_dir_all(outer.join(".sqry").join("graph")).unwrap();
686        let proj = outer.join("work/new-project");
687        std::fs::create_dir_all(&proj).unwrap();
688        std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
689        let outcome = discover_workspace_root(&proj);
690        match outcome {
691            WorkspaceRootDiscovery::BoundaryOnly { boundary, .. } => {
692                assert_eq!(
693                    boundary.canonicalize().unwrap(),
694                    proj.canonicalize().unwrap(),
695                    "boundary should be the inner project root, not the outer stray graph"
696                );
697            }
698            other => {
699                panic!("outer-graph + inner-marker must collapse to BoundaryOnly, got {other:?}")
700            }
701        }
702    }
703
704    /// `assert_no_ancestor_graph(requested, false)` rejects nested
705    /// `.sqry/graph` creation when the same project already has one.
706    #[test]
707    fn assert_no_ancestor_graph_rejects_nested_creation() {
708        let tmp = TempDir::new().unwrap();
709        let proj = tmp.path().join("proj");
710        std::fs::create_dir_all(proj.join(".sqry").join("graph")).unwrap();
711        std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
712        let nested = proj.join("sub");
713        std::fs::create_dir_all(&nested).unwrap();
714        let err = assert_no_ancestor_graph(&nested, false)
715            .expect_err("nested creation must error when ancestor graph exists");
716        assert!(matches!(err, NestedIndexError::AncestorExists { .. }));
717    }
718
719    /// `allow_nested = true` bypasses the guard.
720    #[test]
721    fn assert_no_ancestor_graph_passes_with_allow_nested() {
722        let tmp = TempDir::new().unwrap();
723        let proj = tmp.path().join("proj");
724        std::fs::create_dir_all(proj.join(".sqry").join("graph")).unwrap();
725        std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
726        let nested = proj.join("sub");
727        std::fs::create_dir_all(&nested).unwrap();
728        assert!(assert_no_ancestor_graph(&nested, true).is_ok());
729    }
730}