use std::fs;
use std::path::{Path, PathBuf};
use ignore::WalkBuilder;
use thiserror::Error;
use super::error::{WorkspaceError, WorkspaceResult};
use super::registry::{WorkspaceRepoId, WorkspaceRepository};
use crate::config::buffers::max_repositories;
use crate::project::path_utils::is_ignored_dir;
const MANIFEST_FILE_NAME: &str = "manifest.json";
const GRAPH_DIR_SEGMENT: &str = "graph";
const SQRY_DIR_SEGMENT: &str = ".sqry";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DiscoveryMode {
IndexFiles,
GitRoots,
}
pub fn discover_repositories(
root: &Path,
mode: DiscoveryMode,
) -> WorkspaceResult<Vec<WorkspaceRepository>> {
let mut repositories = Vec::new();
let walker = WalkBuilder::new(root)
.hidden(false)
.ignore(false)
.git_ignore(true)
.git_exclude(true)
.parents(true)
.filter_entry(|entry| {
!is_ignored_dir(entry.file_name())
})
.build();
for result in walker {
let entry = match result {
Ok(ok) => ok,
Err(err) => {
let message = err.to_string();
let io_err = err
.into_io_error()
.unwrap_or_else(|| std::io::Error::other(message));
return Err(WorkspaceError::Discovery {
root: root.to_path_buf(),
source: io_err,
});
}
};
if entry.file_type().is_some_and(|ft| ft.is_dir()) {
continue;
}
if entry.file_name() != MANIFEST_FILE_NAME {
continue;
}
let manifest_path = entry.into_path();
let Some(graph_dir) = manifest_path.parent() else {
continue;
};
if graph_dir.file_name().and_then(|s| s.to_str()) != Some(GRAPH_DIR_SEGMENT) {
continue;
}
let Some(sqry_dir) = graph_dir.parent() else {
continue;
};
if sqry_dir.file_name().and_then(|s| s.to_str()) != Some(SQRY_DIR_SEGMENT) {
continue;
}
let Some(repo_root) = sqry_dir.parent().map(Path::to_path_buf) else {
continue;
};
if matches!(mode, DiscoveryMode::GitRoots) && !repo_root.join(".git").is_dir() {
continue;
}
let relative_path = repo_root.strip_prefix(root).unwrap_or(repo_root.as_path());
let repo_id = WorkspaceRepoId::new(relative_path);
let name = repo_root.file_name().map_or_else(
|| repo_id.as_str().to_string(),
|os| os.to_string_lossy().into_owned(),
);
let metadata = fs::metadata(&manifest_path);
let last_indexed_at = metadata.ok().and_then(|meta| meta.modified().ok());
let max_repos = max_repositories();
if repositories.len() >= max_repos {
return Err(WorkspaceError::TooManyRepositories {
found: repositories.len(),
limit: max_repos,
});
}
repositories.push(WorkspaceRepository::new(
repo_id,
name,
repo_root,
manifest_path,
last_indexed_at,
));
}
repositories.sort_by(|a, b| a.id.cmp(&b.id));
repositories.dedup_by(|a, b| a.id == b.id);
Ok(repositories)
}
pub const MAX_ANCESTOR_DEPTH: usize = 64;
pub const PROJECT_MARKERS: &[&str] = &[
".git",
"Cargo.toml",
"package.json",
"pyproject.toml",
"go.mod",
];
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum WorkspaceRootDiscovery {
GraphFound {
root: PathBuf,
boundary: PathBuf,
depth: usize,
is_file_scope: bool,
},
BoundaryOnly {
boundary: PathBuf,
is_file_scope: bool,
},
None,
}
#[must_use]
pub fn discover_workspace_root(start: &Path) -> WorkspaceRootDiscovery {
let canonical = start.canonicalize().unwrap_or_else(|_| start.to_path_buf());
let is_file_scope = canonical.is_file();
let mut current: PathBuf = if is_file_scope {
canonical
.parent()
.map_or_else(|| canonical.clone(), Path::to_path_buf)
} else {
canonical
};
let mut graph_found: Option<(PathBuf, usize)> = None;
let mut boundary: Option<PathBuf> = None;
for depth in 0..MAX_ANCESTOR_DEPTH {
let graph_dir = current.join(".sqry").join("graph");
let legacy_index = current.join(".sqry-index");
if graph_found.is_none() && (graph_dir.is_dir() || legacy_index.exists()) {
graph_found = Some((current.clone(), depth));
}
if boundary.is_none() && PROJECT_MARKERS.iter().any(|m| current.join(m).exists()) {
boundary = Some(current.clone());
break;
}
if !current.pop() {
break;
}
}
match (graph_found, boundary) {
(Some((root, depth)), Some(boundary_path)) => {
if root.starts_with(&boundary_path) {
WorkspaceRootDiscovery::GraphFound {
root,
boundary: boundary_path,
depth,
is_file_scope,
}
} else {
WorkspaceRootDiscovery::BoundaryOnly {
boundary: boundary_path,
is_file_scope,
}
}
}
(Some((root, depth)), None) => {
WorkspaceRootDiscovery::GraphFound {
boundary: root.clone(),
root,
depth,
is_file_scope,
}
}
(None, Some(boundary_path)) => WorkspaceRootDiscovery::BoundaryOnly {
boundary: boundary_path,
is_file_scope,
},
(None, None) => WorkspaceRootDiscovery::None,
}
}
#[derive(Debug, Clone, Error)]
pub enum NestedIndexError {
#[error(
"refusing to create a nested .sqry/ index.\n\
An ancestor index already exists at: {ancestor_graph}\n\
Requested location: {requested}\n\
Project boundary detected at: {boundary}\n\
\n\
If this is intentional (e.g. a sub-project with its own graph), \
re-run with --allow-nested.\n\
Otherwise: cd to the project root ({boundary}) and run \
`sqry update` (incremental) or `sqry index --force` (rebuild).",
ancestor_graph = ancestor_graph.display(),
requested = requested.display(),
boundary = boundary.display(),
)]
AncestorExists {
requested: PathBuf,
ancestor_graph: PathBuf,
boundary: PathBuf,
},
}
pub fn assert_no_ancestor_graph(
requested: &Path,
allow_nested: bool,
) -> Result<(), NestedIndexError> {
if allow_nested {
return Ok(());
}
let canonical_requested = canonicalise_or_join_cwd(requested);
if !canonical_requested.is_absolute() || canonical_requested.as_os_str().is_empty() {
return Ok(());
}
if let WorkspaceRootDiscovery::GraphFound { root, boundary, .. } =
discover_workspace_root(&canonical_requested)
&& canonical_requested != root
&& canonical_requested.starts_with(&boundary)
{
return Err(NestedIndexError::AncestorExists {
requested: canonical_requested,
ancestor_graph: root.join(".sqry").join("graph"),
boundary,
});
}
Ok(())
}
fn canonicalise_or_join_cwd(path: &Path) -> PathBuf {
if let Ok(canon) = path.canonicalize() {
return canon;
}
if path.is_absolute() {
return path.to_path_buf();
}
let Ok(cwd) = std::env::current_dir() else {
return path.to_path_buf();
};
let joined = cwd.join(path);
joined.canonicalize().unwrap_or(joined)
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct WorkspaceCleanReport {
pub schema_version: u32,
pub root: PathBuf,
pub canonical_active_artifact: Option<PathBuf>,
pub daemon_locked_artifacts: Vec<PathBuf>,
pub discovered: Vec<DiscoveredArtifact>,
pub planned_removals: Vec<PathBuf>,
pub skipped: Vec<SkippedArtifact>,
pub applied: bool,
pub removed: Vec<PathBuf>,
pub errors: Vec<RemovalError>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct DiscoveredArtifact {
pub path: PathBuf,
pub kind: ArtifactKind,
pub size_bytes: u64,
pub last_modified: Option<chrono::DateTime<chrono::Utc>>,
pub is_canonical_active: bool,
pub is_daemon_locked: bool,
pub is_user_state: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
pub enum ArtifactKind {
Graph,
GraphRoot,
Cache,
Prof,
UserState,
LegacyIndex,
WorkspaceRegistry,
NestedGraph,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct SkippedArtifact {
pub path: PathBuf,
pub reason: SkipReason,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
pub enum SkipReason {
CanonicalActive,
DaemonLocked,
UserState,
WorkspaceRegistry,
SymlinkRefused,
OutsideRoot,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct RemovalError {
pub path: PathBuf,
pub error: String,
}
#[cfg(test)]
mod ancestor_tests {
use super::*;
use tempfile::TempDir;
#[test]
fn discover_returns_none_for_empty_hierarchy() {
let tmp = TempDir::new().unwrap();
let leaf = tmp.path().join("a/b/c");
std::fs::create_dir_all(&leaf).unwrap();
let outcome = discover_workspace_root(&leaf);
assert!(
!matches!(outcome, WorkspaceRootDiscovery::GraphFound { .. }),
"no .sqry/graph above leaf, expected None or BoundaryOnly, got {outcome:?}"
);
}
#[test]
fn discover_stops_at_cargo_toml_marker_with_no_graph() {
let tmp = TempDir::new().unwrap();
let proj = tmp.path().join("proj");
let sub = proj.join("sub/deep");
std::fs::create_dir_all(&sub).unwrap();
std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
let outcome = discover_workspace_root(&sub);
match outcome {
WorkspaceRootDiscovery::BoundaryOnly { boundary, .. } => {
assert_eq!(
boundary.canonicalize().unwrap(),
proj.canonicalize().unwrap(),
"boundary must equal proj root"
);
}
other => panic!("expected BoundaryOnly, got {other:?}"),
}
}
#[test]
fn discover_returns_graph_found_when_graph_inside_boundary() {
let tmp = TempDir::new().unwrap();
let proj = tmp.path().join("proj");
let sub = proj.join("sub");
std::fs::create_dir_all(&sub).unwrap();
std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
std::fs::create_dir_all(proj.join(".sqry").join("graph")).unwrap();
let outcome = discover_workspace_root(&sub);
match outcome {
WorkspaceRootDiscovery::GraphFound { root, boundary, .. } => {
assert_eq!(root.canonicalize().unwrap(), proj.canonicalize().unwrap());
assert_eq!(
boundary.canonicalize().unwrap(),
proj.canonicalize().unwrap()
);
}
other => panic!("expected GraphFound, got {other:?}"),
}
}
#[test]
fn discover_discards_outer_graph_when_inner_marker_exists() {
let tmp = TempDir::new().unwrap();
let outer = tmp.path();
std::fs::create_dir_all(outer.join(".sqry").join("graph")).unwrap();
let proj = outer.join("work/new-project");
std::fs::create_dir_all(&proj).unwrap();
std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
let outcome = discover_workspace_root(&proj);
match outcome {
WorkspaceRootDiscovery::BoundaryOnly { boundary, .. } => {
assert_eq!(
boundary.canonicalize().unwrap(),
proj.canonicalize().unwrap(),
"boundary should be the inner project root, not the outer stray graph"
);
}
other => {
panic!("outer-graph + inner-marker must collapse to BoundaryOnly, got {other:?}")
}
}
}
#[test]
fn assert_no_ancestor_graph_rejects_nested_creation() {
let tmp = TempDir::new().unwrap();
let proj = tmp.path().join("proj");
std::fs::create_dir_all(proj.join(".sqry").join("graph")).unwrap();
std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
let nested = proj.join("sub");
std::fs::create_dir_all(&nested).unwrap();
let err = assert_no_ancestor_graph(&nested, false)
.expect_err("nested creation must error when ancestor graph exists");
assert!(matches!(err, NestedIndexError::AncestorExists { .. }));
}
#[test]
fn assert_no_ancestor_graph_passes_with_allow_nested() {
let tmp = TempDir::new().unwrap();
let proj = tmp.path().join("proj");
std::fs::create_dir_all(proj.join(".sqry").join("graph")).unwrap();
std::fs::write(proj.join("Cargo.toml"), "[package]\n").unwrap();
let nested = proj.join("sub");
std::fs::create_dir_all(&nested).unwrap();
assert!(assert_no_ancestor_graph(&nested, true).is_ok());
}
}