Skip to main content

kdo_graph/
discover.rs

1//! Workspace discovery and graph construction.
2
3use crate::hash::content_hash_dir;
4use indexmap::IndexMap;
5use kdo_core::{DepKind, Dependency, KdoError, Project};
6use kdo_resolver::{manifest_filenames, parse_manifest};
7use petgraph::algo::is_cyclic_directed;
8use petgraph::graph::{DiGraph, NodeIndex};
9use petgraph::visit::Bfs;
10use petgraph::Direction;
11use rayon::prelude::*;
12use serde::{Deserialize, Serialize};
13use std::path::{Path, PathBuf};
14use tracing::{debug, info, warn};
15
16/// The workspace dependency graph.
17///
18/// Wraps a `petgraph::DiGraph` where nodes are [`Project`]s and edges are [`DepKind`]s.
19#[derive(Debug)]
20pub struct WorkspaceGraph {
21    /// Root directory of the workspace.
22    pub root: PathBuf,
23    /// Directed graph of projects.
24    pub graph: DiGraph<Project, DepKind>,
25    /// Map from project name to node index.
26    name_to_idx: IndexMap<String, NodeIndex>,
27}
28
29/// Serializable project summary for JSON output.
30#[derive(Debug, Serialize, Deserialize)]
31pub struct ProjectSummary {
32    /// Project name.
33    pub name: String,
34    /// Detected language.
35    pub language: String,
36    /// Context summary if available.
37    pub summary: Option<String>,
38    /// Number of direct dependencies.
39    pub dep_count: usize,
40}
41
42/// Serializable graph output.
43#[derive(Debug, Serialize, Deserialize)]
44pub struct GraphOutput {
45    /// All projects in the workspace.
46    pub projects: Vec<ProjectSummary>,
47    /// Edges as (source_name, target_name, kind).
48    pub edges: Vec<(String, String, String)>,
49}
50
51impl WorkspaceGraph {
52    /// Discover all projects under `root` by walking the filesystem.
53    ///
54    /// Uses the `ignore` crate to respect `.gitignore` and `.kdoignore`.
55    /// Parses manifests in parallel with rayon.
56    pub fn discover(root: &Path) -> Result<Self, KdoError> {
57        let root = root
58            .canonicalize()
59            .map_err(|_| KdoError::ManifestNotFound(root.to_path_buf()))?;
60
61        info!(root = %root.display(), "discovering workspace");
62
63        let manifest_names = manifest_filenames();
64        let walker = ignore::WalkBuilder::new(&root)
65            .hidden(true)
66            .git_ignore(true)
67            .add_custom_ignore_filename(".kdoignore")
68            .filter_entry(|entry| {
69                let name = entry.file_name().to_string_lossy();
70                // Skip node_modules, target, .git, etc.
71                !matches!(
72                    name.as_ref(),
73                    "node_modules" | "target" | ".git" | ".kdo" | "dist" | "build" | "__pycache__"
74                )
75            })
76            .build();
77
78        // Collect manifest paths
79        let manifest_paths: Vec<PathBuf> = walker
80            .filter_map(|entry| entry.ok())
81            .filter(|entry| {
82                entry.file_type().map(|ft| ft.is_file()).unwrap_or(false)
83                    && entry
84                        .file_name()
85                        .to_str()
86                        .map(|name| manifest_names.contains(&name))
87                        .unwrap_or(false)
88            })
89            .map(|entry| entry.into_path())
90            .collect();
91
92        debug!(count = manifest_paths.len(), "found manifest files");
93
94        // Filter out manifests in directories that have a more specific manifest.
95        // E.g., if a dir has both Anchor.toml and Cargo.toml, Anchor takes priority.
96        let filtered = filter_manifests(&manifest_paths);
97
98        // Parse manifests in parallel
99        let results: Vec<Result<(Project, Vec<Dependency>), KdoError>> = filtered
100            .par_iter()
101            .map(|path| parse_manifest(path, &root))
102            .collect();
103
104        let mut graph = DiGraph::new();
105        let mut name_to_idx = IndexMap::new();
106        let mut all_deps: Vec<(String, Vec<Dependency>)> = Vec::new();
107
108        for result in results {
109            match result {
110                Ok((mut project, deps)) => {
111                    // Compute content hash
112                    project.content_hash = content_hash_dir(&project.path);
113
114                    let name = project.name.clone();
115                    let idx = graph.add_node(project);
116                    name_to_idx.insert(name.clone(), idx);
117                    all_deps.push((name, deps));
118                }
119                Err(KdoError::ManifestNotFound(_)) => {
120                    // Skip workspace-root manifests
121                    continue;
122                }
123                Err(e) => {
124                    warn!(error = %e, "skipping unparseable manifest");
125                }
126            }
127        }
128
129        // Wire up edges for local/workspace dependencies
130        for (source_name, deps) in &all_deps {
131            if let Some(&source_idx) = name_to_idx.get(source_name) {
132                for dep in deps {
133                    if let Some(&target_idx) = name_to_idx.get(&dep.name) {
134                        graph.add_edge(source_idx, target_idx, dep.kind.clone());
135                    }
136                }
137            }
138        }
139
140        info!(
141            projects = name_to_idx.len(),
142            edges = graph.edge_count(),
143            "workspace graph built"
144        );
145
146        Ok(Self {
147            root,
148            graph,
149            name_to_idx,
150        })
151    }
152
153    /// Get a project by name.
154    pub fn get_project(&self, name: &str) -> Result<&Project, KdoError> {
155        let idx = self
156            .name_to_idx
157            .get(name)
158            .ok_or_else(|| KdoError::ProjectNotFound(name.to_string()))?;
159        Ok(&self.graph[*idx])
160    }
161
162    /// Get all projects.
163    pub fn projects(&self) -> Vec<&Project> {
164        self.graph.node_weights().collect()
165    }
166
167    /// Dependency closure: all transitive dependencies of a project (DFS on outgoing edges).
168    pub fn dependency_closure(&self, name: &str) -> Result<Vec<&Project>, KdoError> {
169        let start = self
170            .name_to_idx
171            .get(name)
172            .ok_or_else(|| KdoError::ProjectNotFound(name.to_string()))?;
173
174        let mut visited = Vec::new();
175        let mut stack = vec![*start];
176        let mut seen = std::collections::HashSet::new();
177        seen.insert(*start);
178
179        while let Some(node) = stack.pop() {
180            // Skip the start node itself
181            if node != *start {
182                visited.push(&self.graph[node]);
183            }
184            for neighbor in self.graph.neighbors_directed(node, Direction::Outgoing) {
185                if seen.insert(neighbor) {
186                    stack.push(neighbor);
187                }
188            }
189        }
190
191        Ok(visited)
192    }
193
194    /// Affected set: all projects that transitively depend on the given project.
195    /// Uses BFS on the reversed graph (incoming edges).
196    pub fn affected_set(&self, name: &str) -> Result<Vec<&Project>, KdoError> {
197        let start = self
198            .name_to_idx
199            .get(name)
200            .ok_or_else(|| KdoError::ProjectNotFound(name.to_string()))?;
201
202        // BFS on reversed edges
203        let reversed = petgraph::visit::Reversed(&self.graph);
204        let mut bfs = Bfs::new(&reversed, *start);
205        let mut affected = Vec::new();
206
207        while let Some(node) = bfs.next(&reversed) {
208            if node != *start {
209                affected.push(&self.graph[node]);
210            }
211        }
212
213        Ok(affected)
214    }
215
216    /// Detect cycles in the dependency graph.
217    pub fn detect_cycles(&self) -> Result<(), KdoError> {
218        if is_cyclic_directed(&self.graph) {
219            // Find a cycle for the error message
220            let cycle_desc = find_cycle_description(&self.graph);
221            return Err(KdoError::CircularDependency(cycle_desc));
222        }
223        Ok(())
224    }
225
226    /// Get a serializable summary of all projects.
227    pub fn project_summaries(&self) -> Vec<ProjectSummary> {
228        self.graph
229            .node_indices()
230            .map(|idx| {
231                let project = &self.graph[idx];
232                let dep_count = self
233                    .graph
234                    .neighbors_directed(idx, Direction::Outgoing)
235                    .count();
236                ProjectSummary {
237                    name: project.name.clone(),
238                    language: project.language.to_string(),
239                    summary: project.context_summary.clone(),
240                    dep_count,
241                }
242            })
243            .collect()
244    }
245
246    /// Get the full graph as a serializable structure.
247    pub fn to_graph_output(&self) -> GraphOutput {
248        let projects = self.project_summaries();
249        let edges = self
250            .graph
251            .edge_indices()
252            .filter_map(|edge_idx| {
253                let (source, target) = self.graph.edge_endpoints(edge_idx)?;
254                let kind = &self.graph[edge_idx];
255                Some((
256                    self.graph[source].name.clone(),
257                    self.graph[target].name.clone(),
258                    kind.to_string(),
259                ))
260            })
261            .collect();
262
263        GraphOutput { projects, edges }
264    }
265
266    /// Generate a DOT representation of the graph.
267    pub fn to_dot(&self) -> String {
268        let mut dot = String::from("digraph workspace {\n  rankdir=LR;\n");
269        for idx in self.graph.node_indices() {
270            let project = &self.graph[idx];
271            dot.push_str(&format!(
272                "  \"{}\" [label=\"{}\\n({})\"];\n",
273                project.name, project.name, project.language
274            ));
275        }
276        for edge_idx in self.graph.edge_indices() {
277            if let Some((source, target)) = self.graph.edge_endpoints(edge_idx) {
278                let kind = &self.graph[edge_idx];
279                dot.push_str(&format!(
280                    "  \"{}\" -> \"{}\" [label=\"{}\"];\n",
281                    self.graph[source].name, self.graph[target].name, kind
282                ));
283            }
284        }
285        dot.push_str("}\n");
286        dot
287    }
288
289    /// Generate a text representation of the graph.
290    pub fn to_text(&self) -> String {
291        let mut out = String::new();
292        for idx in self.graph.node_indices() {
293            let project = &self.graph[idx];
294            out.push_str(&format!("{} ({})\n", project.name, project.language));
295            for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
296                let dep = &self.graph[neighbor];
297                out.push_str(&format!("  -> {}\n", dep.name));
298            }
299        }
300        out
301    }
302
303    /// Dependency closure as JSON string.
304    pub fn dependency_closure_json(&self, project: &str) -> Result<String, KdoError> {
305        let deps = self.dependency_closure(project)?;
306        let names: Vec<&str> = deps.iter().map(|p| p.name.as_str()).collect();
307        serde_json::to_string_pretty(&names).map_err(|e| KdoError::ParseError {
308            path: std::path::PathBuf::from("<json>"),
309            source: e.into(),
310        })
311    }
312
313    /// Affected set as JSON string.
314    pub fn affected_set_json(&self, project: &str) -> Result<String, KdoError> {
315        let affected = self.affected_set(project)?;
316        let names: Vec<&str> = affected.iter().map(|p| p.name.as_str()).collect();
317        serde_json::to_string_pretty(&names).map_err(|e| KdoError::ParseError {
318            path: std::path::PathBuf::from("<json>"),
319            source: e.into(),
320        })
321    }
322
323    /// Return all projects in topological order (dependencies before dependents).
324    ///
325    /// Falls back to insertion order if the graph has cycles (which `detect_cycles` should
326    /// have caught earlier).
327    pub fn topological_order(&self) -> Vec<&Project> {
328        match petgraph::algo::toposort(&self.graph, None) {
329            Ok(indices) => indices.iter().map(|idx| &self.graph[*idx]).collect(),
330            Err(_) => self.projects(),
331        }
332    }
333
334    /// Find projects affected by changes since a git ref.
335    ///
336    /// Shells out to `git diff --name-only` to find changed files,
337    /// then maps them to owning projects.
338    pub fn affected_since_ref(&self, base_ref: &str) -> Result<Vec<String>, KdoError> {
339        let output = std::process::Command::new("git")
340            .args(["diff", "--name-only", &format!("{base_ref}...HEAD")])
341            .current_dir(&self.root)
342            .output()?;
343
344        if !output.status.success() {
345            // Fallback: try without ...HEAD (for uncommitted changes)
346            let output = std::process::Command::new("git")
347                .args(["diff", "--name-only", base_ref])
348                .current_dir(&self.root)
349                .output()?;
350
351            if !output.status.success() {
352                return Ok(Vec::new());
353            }
354            return self.map_paths_to_projects(&String::from_utf8_lossy(&output.stdout));
355        }
356
357        self.map_paths_to_projects(&String::from_utf8_lossy(&output.stdout))
358    }
359
360    /// Map changed file paths to their owning project names.
361    fn map_paths_to_projects(&self, diff_output: &str) -> Result<Vec<String>, KdoError> {
362        let mut affected = std::collections::HashSet::new();
363        for line in diff_output.lines() {
364            let changed_path = self.root.join(line.trim());
365            for project in self.projects() {
366                if changed_path.starts_with(&project.path) {
367                    affected.insert(project.name.clone());
368                }
369            }
370        }
371        let mut result: Vec<String> = affected.into_iter().collect();
372        result.sort();
373        Ok(result)
374    }
375}
376
377/// Filter manifests to prefer more specific ones (Anchor.toml > Cargo.toml in same dir).
378fn filter_manifests(paths: &[PathBuf]) -> Vec<PathBuf> {
379    let mut by_dir: IndexMap<PathBuf, Vec<PathBuf>> = IndexMap::new();
380    for path in paths {
381        let dir = path.parent().unwrap_or(Path::new(".")).to_path_buf();
382        by_dir.entry(dir).or_default().push(path.clone());
383    }
384
385    let mut filtered = Vec::new();
386    for (_dir, manifests) in &by_dir {
387        // Priority: Anchor.toml > Cargo.toml > package.json > pyproject.toml
388        let has_anchor = manifests
389            .iter()
390            .any(|p| p.file_name().map(|f| f == "Anchor.toml").unwrap_or(false));
391        for manifest in manifests {
392            let filename = manifest
393                .file_name()
394                .map(|f| f.to_string_lossy().to_string())
395                .unwrap_or_default();
396            // If Anchor.toml exists, skip Cargo.toml in the same dir
397            if has_anchor && filename == "Cargo.toml" {
398                continue;
399            }
400            filtered.push(manifest.clone());
401        }
402    }
403
404    filtered
405}
406
407/// Find a cycle and return a human-readable description.
408fn find_cycle_description(graph: &DiGraph<Project, DepKind>) -> String {
409    // Use petgraph's toposort which returns the node involved in a cycle on error
410    match petgraph::algo::toposort(graph, None) {
411        Ok(_) => "unknown cycle".to_string(),
412        Err(cycle) => {
413            let node = &graph[cycle.node_id()];
414            format!("cycle involving '{}'", node.name)
415        }
416    }
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    #[test]
424    fn test_filter_manifests_anchor_priority() {
425        let paths = vec![
426            PathBuf::from("/project/Anchor.toml"),
427            PathBuf::from("/project/Cargo.toml"),
428            PathBuf::from("/project/sub/Cargo.toml"),
429        ];
430        let filtered = filter_manifests(&paths);
431        assert!(filtered.contains(&PathBuf::from("/project/Anchor.toml")));
432        assert!(!filtered.contains(&PathBuf::from("/project/Cargo.toml")));
433        assert!(filtered.contains(&PathBuf::from("/project/sub/Cargo.toml")));
434    }
435}