Skip to main content

kdo_graph/
discover.rs

1//! Workspace discovery and graph construction.
2
3use crate::hash::content_hash_dir;
4use indexmap::IndexMap;
5use kdo_core::{DepKind, Dependency, KdoError, Project};
6use kdo_resolver::{manifest_filenames, parse_manifest};
7use petgraph::algo::is_cyclic_directed;
8use petgraph::graph::{DiGraph, NodeIndex};
9use petgraph::visit::Bfs;
10use petgraph::Direction;
11use rayon::prelude::*;
12use serde::{Deserialize, Serialize};
13use std::path::{Path, PathBuf};
14use tracing::{debug, info, warn};
15
16/// The workspace dependency graph.
17///
18/// Wraps a `petgraph::DiGraph` where nodes are [`Project`]s and edges are [`DepKind`]s.
19#[derive(Debug)]
20pub struct WorkspaceGraph {
21    /// Root directory of the workspace.
22    pub root: PathBuf,
23    /// Directed graph of projects.
24    pub graph: DiGraph<Project, DepKind>,
25    /// Map from project name to node index.
26    name_to_idx: IndexMap<String, NodeIndex>,
27}
28
29/// Serializable project summary for JSON output.
30#[derive(Debug, Serialize, Deserialize)]
31pub struct ProjectSummary {
32    /// Project name.
33    pub name: String,
34    /// Detected language.
35    pub language: String,
36    /// Context summary if available.
37    pub summary: Option<String>,
38    /// Number of direct dependencies.
39    pub dep_count: usize,
40}
41
42/// Serializable graph output.
43#[derive(Debug, Serialize, Deserialize)]
44pub struct GraphOutput {
45    /// All projects in the workspace.
46    pub projects: Vec<ProjectSummary>,
47    /// Edges as (source_name, target_name, kind).
48    pub edges: Vec<(String, String, String)>,
49}
50
51impl WorkspaceGraph {
52    /// Discover all projects under `root` by walking the filesystem.
53    ///
54    /// Uses the `ignore` crate to respect `.gitignore` and `.kdoignore`.
55    /// Parses manifests in parallel with rayon.
56    pub fn discover(root: &Path) -> Result<Self, KdoError> {
57        let root = root
58            .canonicalize()
59            .map_err(|_| KdoError::ManifestNotFound(root.to_path_buf()))?;
60
61        info!(root = %root.display(), "discovering workspace");
62
63        let manifest_names = manifest_filenames();
64        let walker = ignore::WalkBuilder::new(&root)
65            .hidden(true)
66            .git_ignore(true)
67            .add_custom_ignore_filename(".kdoignore")
68            .filter_entry(|entry| {
69                let name = entry.file_name().to_string_lossy();
70                // Skip node_modules, target, .git, etc.
71                !matches!(
72                    name.as_ref(),
73                    "node_modules" | "target" | ".git" | ".kdo" | "dist" | "build" | "__pycache__"
74                )
75            })
76            .build();
77
78        // Collect manifest paths
79        let manifest_paths: Vec<PathBuf> = walker
80            .filter_map(|entry| entry.ok())
81            .filter(|entry| {
82                entry.file_type().map(|ft| ft.is_file()).unwrap_or(false)
83                    && entry
84                        .file_name()
85                        .to_str()
86                        .map(|name| manifest_names.contains(&name))
87                        .unwrap_or(false)
88            })
89            .map(|entry| entry.into_path())
90            .collect();
91
92        // Apply kdo.toml workspace.projects / workspace.exclude globs if present.
93        let manifest_paths = apply_workspace_globs(&root, manifest_paths);
94
95        debug!(count = manifest_paths.len(), "found manifest files");
96
97        // Filter out manifests in directories that have a more specific manifest.
98        // E.g., if a dir has both Anchor.toml and Cargo.toml, Anchor takes priority.
99        let filtered = filter_manifests(&manifest_paths);
100
101        // Parse manifests in parallel
102        let results: Vec<Result<(Project, Vec<Dependency>), KdoError>> = filtered
103            .par_iter()
104            .map(|path| parse_manifest(path, &root))
105            .collect();
106
107        let mut graph = DiGraph::new();
108        let mut name_to_idx = IndexMap::new();
109        let mut all_deps: Vec<(String, Vec<Dependency>)> = Vec::new();
110
111        for result in results {
112            match result {
113                Ok((mut project, deps)) => {
114                    // Compute content hash
115                    project.content_hash = content_hash_dir(&project.path);
116
117                    let name = project.name.clone();
118                    let idx = graph.add_node(project);
119                    name_to_idx.insert(name.clone(), idx);
120                    all_deps.push((name, deps));
121                }
122                Err(KdoError::ManifestNotFound(_)) => {
123                    // Skip workspace-root manifests
124                    continue;
125                }
126                Err(e) => {
127                    warn!(error = %e, "skipping unparseable manifest");
128                }
129            }
130        }
131
132        // Wire up edges for local/workspace dependencies
133        for (source_name, deps) in &all_deps {
134            if let Some(&source_idx) = name_to_idx.get(source_name) {
135                for dep in deps {
136                    if let Some(&target_idx) = name_to_idx.get(&dep.name) {
137                        graph.add_edge(source_idx, target_idx, dep.kind.clone());
138                    }
139                }
140            }
141        }
142
143        info!(
144            projects = name_to_idx.len(),
145            edges = graph.edge_count(),
146            "workspace graph built"
147        );
148
149        Ok(Self {
150            root,
151            graph,
152            name_to_idx,
153        })
154    }
155
156    /// Get a project by name.
157    pub fn get_project(&self, name: &str) -> Result<&Project, KdoError> {
158        let idx = self
159            .name_to_idx
160            .get(name)
161            .ok_or_else(|| KdoError::ProjectNotFound(name.to_string()))?;
162        Ok(&self.graph[*idx])
163    }
164
165    /// Get all projects.
166    pub fn projects(&self) -> Vec<&Project> {
167        self.graph.node_weights().collect()
168    }
169
170    /// Dependency closure: all transitive dependencies of a project (DFS on outgoing edges).
171    pub fn dependency_closure(&self, name: &str) -> Result<Vec<&Project>, KdoError> {
172        let start = self
173            .name_to_idx
174            .get(name)
175            .ok_or_else(|| KdoError::ProjectNotFound(name.to_string()))?;
176
177        let mut visited = Vec::new();
178        let mut stack = vec![*start];
179        let mut seen = std::collections::HashSet::new();
180        seen.insert(*start);
181
182        while let Some(node) = stack.pop() {
183            // Skip the start node itself
184            if node != *start {
185                visited.push(&self.graph[node]);
186            }
187            for neighbor in self.graph.neighbors_directed(node, Direction::Outgoing) {
188                if seen.insert(neighbor) {
189                    stack.push(neighbor);
190                }
191            }
192        }
193
194        Ok(visited)
195    }
196
197    /// Affected set: all projects that transitively depend on the given project.
198    /// Uses BFS on the reversed graph (incoming edges).
199    pub fn affected_set(&self, name: &str) -> Result<Vec<&Project>, KdoError> {
200        let start = self
201            .name_to_idx
202            .get(name)
203            .ok_or_else(|| KdoError::ProjectNotFound(name.to_string()))?;
204
205        // BFS on reversed edges
206        let reversed = petgraph::visit::Reversed(&self.graph);
207        let mut bfs = Bfs::new(&reversed, *start);
208        let mut affected = Vec::new();
209
210        while let Some(node) = bfs.next(&reversed) {
211            if node != *start {
212                affected.push(&self.graph[node]);
213            }
214        }
215
216        Ok(affected)
217    }
218
219    /// Detect cycles in the dependency graph.
220    pub fn detect_cycles(&self) -> Result<(), KdoError> {
221        if is_cyclic_directed(&self.graph) {
222            // Find a cycle for the error message
223            let cycle_desc = find_cycle_description(&self.graph);
224            return Err(KdoError::CircularDependency(cycle_desc));
225        }
226        Ok(())
227    }
228
229    /// Get a serializable summary of all projects.
230    pub fn project_summaries(&self) -> Vec<ProjectSummary> {
231        self.graph
232            .node_indices()
233            .map(|idx| {
234                let project = &self.graph[idx];
235                let dep_count = self
236                    .graph
237                    .neighbors_directed(idx, Direction::Outgoing)
238                    .count();
239                ProjectSummary {
240                    name: project.name.clone(),
241                    language: project.language.to_string(),
242                    summary: project.context_summary.clone(),
243                    dep_count,
244                }
245            })
246            .collect()
247    }
248
249    /// Get the full graph as a serializable structure.
250    pub fn to_graph_output(&self) -> GraphOutput {
251        let projects = self.project_summaries();
252        let edges = self
253            .graph
254            .edge_indices()
255            .filter_map(|edge_idx| {
256                let (source, target) = self.graph.edge_endpoints(edge_idx)?;
257                let kind = &self.graph[edge_idx];
258                Some((
259                    self.graph[source].name.clone(),
260                    self.graph[target].name.clone(),
261                    kind.to_string(),
262                ))
263            })
264            .collect();
265
266        GraphOutput { projects, edges }
267    }
268
269    /// Generate a DOT representation of the graph.
270    pub fn to_dot(&self) -> String {
271        let mut dot = String::from("digraph workspace {\n  rankdir=LR;\n");
272        for idx in self.graph.node_indices() {
273            let project = &self.graph[idx];
274            dot.push_str(&format!(
275                "  \"{}\" [label=\"{}\\n({})\"];\n",
276                project.name, project.name, project.language
277            ));
278        }
279        for edge_idx in self.graph.edge_indices() {
280            if let Some((source, target)) = self.graph.edge_endpoints(edge_idx) {
281                let kind = &self.graph[edge_idx];
282                dot.push_str(&format!(
283                    "  \"{}\" -> \"{}\" [label=\"{}\"];\n",
284                    self.graph[source].name, self.graph[target].name, kind
285                ));
286            }
287        }
288        dot.push_str("}\n");
289        dot
290    }
291
292    /// Generate a text representation of the graph.
293    pub fn to_text(&self) -> String {
294        let mut out = String::new();
295        for idx in self.graph.node_indices() {
296            let project = &self.graph[idx];
297            out.push_str(&format!("{} ({})\n", project.name, project.language));
298            for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
299                let dep = &self.graph[neighbor];
300                out.push_str(&format!("  -> {}\n", dep.name));
301            }
302        }
303        out
304    }
305
306    /// Dependency closure as JSON string.
307    pub fn dependency_closure_json(&self, project: &str) -> Result<String, KdoError> {
308        let deps = self.dependency_closure(project)?;
309        let names: Vec<&str> = deps.iter().map(|p| p.name.as_str()).collect();
310        serde_json::to_string_pretty(&names).map_err(|e| KdoError::ParseError {
311            path: std::path::PathBuf::from("<json>"),
312            source: e.into(),
313        })
314    }
315
316    /// Affected set as JSON string.
317    pub fn affected_set_json(&self, project: &str) -> Result<String, KdoError> {
318        let affected = self.affected_set(project)?;
319        let names: Vec<&str> = affected.iter().map(|p| p.name.as_str()).collect();
320        serde_json::to_string_pretty(&names).map_err(|e| KdoError::ParseError {
321            path: std::path::PathBuf::from("<json>"),
322            source: e.into(),
323        })
324    }
325
326    /// Return all projects in topological order (dependencies before dependents).
327    ///
328    /// Falls back to insertion order if the graph has cycles (which `detect_cycles` should
329    /// have caught earlier).
330    pub fn topological_order(&self) -> Vec<&Project> {
331        match petgraph::algo::toposort(&self.graph, None) {
332            Ok(indices) => indices.iter().map(|idx| &self.graph[*idx]).collect(),
333            Err(_) => self.projects(),
334        }
335    }
336
337    /// Find projects affected by changes since a git ref.
338    ///
339    /// Shells out to `git diff --name-only` to find changed files,
340    /// then maps them to owning projects.
341    pub fn affected_since_ref(&self, base_ref: &str) -> Result<Vec<String>, KdoError> {
342        let output = std::process::Command::new("git")
343            .args(["diff", "--name-only", &format!("{base_ref}...HEAD")])
344            .current_dir(&self.root)
345            .output()?;
346
347        if !output.status.success() {
348            // Fallback: try without ...HEAD (for uncommitted changes)
349            let output = std::process::Command::new("git")
350                .args(["diff", "--name-only", base_ref])
351                .current_dir(&self.root)
352                .output()?;
353
354            if !output.status.success() {
355                return Ok(Vec::new());
356            }
357            return self.map_paths_to_projects(&String::from_utf8_lossy(&output.stdout));
358        }
359
360        self.map_paths_to_projects(&String::from_utf8_lossy(&output.stdout))
361    }
362
363    /// Map changed file paths to their owning project names.
364    fn map_paths_to_projects(&self, diff_output: &str) -> Result<Vec<String>, KdoError> {
365        let mut affected = std::collections::HashSet::new();
366        for line in diff_output.lines() {
367            let changed_path = self.root.join(line.trim());
368            for project in self.projects() {
369                if changed_path.starts_with(&project.path) {
370                    affected.insert(project.name.clone());
371                }
372            }
373        }
374        let mut result: Vec<String> = affected.into_iter().collect();
375        result.sort();
376        Ok(result)
377    }
378}
379
380/// Apply `workspace.projects` and `workspace.exclude` glob filters from `kdo.toml`.
381/// If no `kdo.toml` is present or no globs are set, returns the input unchanged.
382fn apply_workspace_globs(root: &Path, paths: Vec<PathBuf>) -> Vec<PathBuf> {
383    use globset::{Glob, GlobSetBuilder};
384
385    let kdo_toml = root.join("kdo.toml");
386    if !kdo_toml.exists() {
387        return paths;
388    }
389    let Ok(config) = kdo_core::WorkspaceConfig::load(&kdo_toml) else {
390        return paths;
391    };
392
393    let build_set = |patterns: &[String]| -> Option<globset::GlobSet> {
394        if patterns.is_empty() {
395            return None;
396        }
397        let mut builder = GlobSetBuilder::new();
398        for p in patterns {
399            if let Ok(g) = Glob::new(p) {
400                builder.add(g);
401            }
402        }
403        builder.build().ok()
404    };
405
406    let include = build_set(&config.workspace.project_globs);
407    let exclude = build_set(&config.workspace.exclude);
408
409    paths
410        .into_iter()
411        .filter(|path| {
412            let Ok(rel) = path.strip_prefix(root) else {
413                return true;
414            };
415            let parent_rel = rel.parent().unwrap_or(Path::new(""));
416
417            if let Some(ex) = &exclude {
418                if ex.is_match(parent_rel) {
419                    return false;
420                }
421            }
422            if let Some(inc) = &include {
423                return inc.is_match(parent_rel);
424            }
425            true
426        })
427        .collect()
428}
429
430/// Filter manifests to prefer more specific ones (Anchor.toml > Cargo.toml in same dir).
431fn filter_manifests(paths: &[PathBuf]) -> Vec<PathBuf> {
432    let mut by_dir: IndexMap<PathBuf, Vec<PathBuf>> = IndexMap::new();
433    for path in paths {
434        let dir = path.parent().unwrap_or(Path::new(".")).to_path_buf();
435        by_dir.entry(dir).or_default().push(path.clone());
436    }
437
438    let mut filtered = Vec::new();
439    for (_dir, manifests) in &by_dir {
440        // Priority: Anchor.toml > Cargo.toml > package.json > pyproject.toml
441        let has_anchor = manifests
442            .iter()
443            .any(|p| p.file_name().map(|f| f == "Anchor.toml").unwrap_or(false));
444        for manifest in manifests {
445            let filename = manifest
446                .file_name()
447                .map(|f| f.to_string_lossy().to_string())
448                .unwrap_or_default();
449            // If Anchor.toml exists, skip Cargo.toml in the same dir
450            if has_anchor && filename == "Cargo.toml" {
451                continue;
452            }
453            filtered.push(manifest.clone());
454        }
455    }
456
457    filtered
458}
459
460/// Find a cycle and return a human-readable description.
461fn find_cycle_description(graph: &DiGraph<Project, DepKind>) -> String {
462    // Use petgraph's toposort which returns the node involved in a cycle on error
463    match petgraph::algo::toposort(graph, None) {
464        Ok(_) => "unknown cycle".to_string(),
465        Err(cycle) => {
466            let node = &graph[cycle.node_id()];
467            format!("cycle involving '{}'", node.name)
468        }
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    #[test]
477    fn test_filter_manifests_anchor_priority() {
478        let paths = vec![
479            PathBuf::from("/project/Anchor.toml"),
480            PathBuf::from("/project/Cargo.toml"),
481            PathBuf::from("/project/sub/Cargo.toml"),
482        ];
483        let filtered = filter_manifests(&paths);
484        assert!(filtered.contains(&PathBuf::from("/project/Anchor.toml")));
485        assert!(!filtered.contains(&PathBuf::from("/project/Cargo.toml")));
486        assert!(filtered.contains(&PathBuf::from("/project/sub/Cargo.toml")));
487    }
488}