Skip to main content

kdo_graph/
discover.rs

1//! Workspace discovery and graph construction.
2
3use crate::hash::content_hash_dir;
4use indexmap::IndexMap;
5use kdo_core::{DepKind, Dependency, KdoError, Project};
6use kdo_resolver::{manifest_filenames, parse_manifest};
7use petgraph::algo::is_cyclic_directed;
8use petgraph::graph::{DiGraph, NodeIndex};
9use petgraph::visit::Bfs;
10use petgraph::Direction;
11use rayon::prelude::*;
12use serde::{Deserialize, Serialize};
13use std::path::{Path, PathBuf};
14use tracing::{debug, info, warn};
15
16/// The workspace dependency graph.
17///
18/// Wraps a `petgraph::DiGraph` where nodes are [`Project`]s and edges are [`DepKind`]s.
19#[derive(Debug)]
20pub struct WorkspaceGraph {
21    /// Root directory of the workspace.
22    pub root: PathBuf,
23    /// Directed graph of projects.
24    pub graph: DiGraph<Project, DepKind>,
25    /// Map from project name to node index.
26    name_to_idx: IndexMap<String, NodeIndex>,
27}
28
29/// Serializable project summary for JSON output.
30#[derive(Debug, Serialize, Deserialize)]
31pub struct ProjectSummary {
32    /// Project name.
33    pub name: String,
34    /// Detected language.
35    pub language: String,
36    /// Context summary if available.
37    pub summary: Option<String>,
38    /// Number of direct dependencies.
39    pub dep_count: usize,
40}
41
42/// Serializable graph output.
43#[derive(Debug, Serialize, Deserialize)]
44pub struct GraphOutput {
45    /// All projects in the workspace.
46    pub projects: Vec<ProjectSummary>,
47    /// Edges as (source_name, target_name, kind).
48    pub edges: Vec<(String, String, String)>,
49}
50
51impl WorkspaceGraph {
52    /// Discover all projects under `root` by walking the filesystem.
53    ///
54    /// Uses the `ignore` crate to respect `.gitignore` and `.kdoignore`.
55    /// Parses manifests in parallel with rayon.
56    pub fn discover(root: &Path) -> Result<Self, KdoError> {
57        let root = root
58            .canonicalize()
59            .map_err(|_| KdoError::ManifestNotFound(root.to_path_buf()))?;
60
61        info!(root = %root.display(), "discovering workspace");
62
63        let manifest_names = manifest_filenames();
64        let walker = ignore::WalkBuilder::new(&root)
65            .hidden(true)
66            .git_ignore(true)
67            .add_custom_ignore_filename(".kdoignore")
68            .filter_entry(|entry| {
69                let name = entry.file_name().to_string_lossy();
70                // Skip node_modules, target, .git, etc.
71                !matches!(
72                    name.as_ref(),
73                    "node_modules" | "target" | ".git" | ".kdo" | "dist" | "build" | "__pycache__"
74                )
75            })
76            .build();
77
78        // Collect manifest paths
79        let manifest_paths: Vec<PathBuf> = walker
80            .filter_map(|entry| entry.ok())
81            .filter(|entry| {
82                entry.file_type().map(|ft| ft.is_file()).unwrap_or(false)
83                    && entry
84                        .file_name()
85                        .to_str()
86                        .map(|name| manifest_names.contains(&name))
87                        .unwrap_or(false)
88            })
89            .map(|entry| entry.into_path())
90            .collect();
91
92        // Apply kdo.toml workspace.projects / workspace.exclude globs if present.
93        let manifest_paths = apply_workspace_globs(&root, manifest_paths);
94
95        debug!(count = manifest_paths.len(), "found manifest files");
96
97        // Filter out manifests in directories that have a more specific manifest.
98        // E.g., if a dir has both Anchor.toml and Cargo.toml, Anchor takes priority.
99        let filtered = filter_manifests(&manifest_paths);
100
101        // Parse manifests in parallel
102        let results: Vec<Result<(Project, Vec<Dependency>), KdoError>> = filtered
103            .par_iter()
104            .map(|path| parse_manifest(path, &root))
105            .collect();
106
107        let mut graph = DiGraph::new();
108        let mut name_to_idx = IndexMap::new();
109        let mut all_deps: Vec<(String, Vec<Dependency>)> = Vec::new();
110
111        for result in results {
112            match result {
113                Ok((mut project, deps)) => {
114                    // Compute content hash
115                    project.content_hash = content_hash_dir(&project.path);
116
117                    let name = project.name.clone();
118                    let idx = graph.add_node(project);
119                    name_to_idx.insert(name.clone(), idx);
120                    all_deps.push((name, deps));
121                }
122                Err(KdoError::ManifestNotFound(_)) => {
123                    // Skip workspace-root manifests
124                    continue;
125                }
126                Err(e) => {
127                    warn!(error = %e, "skipping unparseable manifest");
128                }
129            }
130        }
131
132        // Wire up edges for local/workspace dependencies
133        for (source_name, deps) in &all_deps {
134            if let Some(&source_idx) = name_to_idx.get(source_name) {
135                for dep in deps {
136                    if let Some(&target_idx) = name_to_idx.get(&dep.name) {
137                        graph.add_edge(source_idx, target_idx, dep.kind.clone());
138                    }
139                }
140            }
141        }
142
143        info!(
144            projects = name_to_idx.len(),
145            edges = graph.edge_count(),
146            "workspace graph built"
147        );
148
149        Ok(Self {
150            root,
151            graph,
152            name_to_idx,
153        })
154    }
155
156    /// Get a project by name.
157    pub fn get_project(&self, name: &str) -> Result<&Project, KdoError> {
158        let idx = self
159            .name_to_idx
160            .get(name)
161            .ok_or_else(|| KdoError::ProjectNotFound(name.to_string()))?;
162        Ok(&self.graph[*idx])
163    }
164
165    /// Get all projects.
166    pub fn projects(&self) -> Vec<&Project> {
167        self.graph.node_weights().collect()
168    }
169
170    /// Dependency closure: all transitive dependencies of a project (DFS on outgoing edges).
171    pub fn dependency_closure(&self, name: &str) -> Result<Vec<&Project>, KdoError> {
172        let start = self
173            .name_to_idx
174            .get(name)
175            .ok_or_else(|| KdoError::ProjectNotFound(name.to_string()))?;
176
177        let mut visited = Vec::new();
178        let mut stack = vec![*start];
179        let mut seen = std::collections::HashSet::new();
180        seen.insert(*start);
181
182        while let Some(node) = stack.pop() {
183            // Skip the start node itself
184            if node != *start {
185                visited.push(&self.graph[node]);
186            }
187            for neighbor in self.graph.neighbors_directed(node, Direction::Outgoing) {
188                if seen.insert(neighbor) {
189                    stack.push(neighbor);
190                }
191            }
192        }
193
194        Ok(visited)
195    }
196
197    /// Affected set: all projects that transitively depend on the given project.
198    /// Uses BFS on the reversed graph (incoming edges).
199    pub fn affected_set(&self, name: &str) -> Result<Vec<&Project>, KdoError> {
200        let start = self
201            .name_to_idx
202            .get(name)
203            .ok_or_else(|| KdoError::ProjectNotFound(name.to_string()))?;
204
205        // BFS on reversed edges
206        let reversed = petgraph::visit::Reversed(&self.graph);
207        let mut bfs = Bfs::new(&reversed, *start);
208        let mut affected = Vec::new();
209
210        while let Some(node) = bfs.next(&reversed) {
211            if node != *start {
212                affected.push(&self.graph[node]);
213            }
214        }
215
216        Ok(affected)
217    }
218
219    /// Detect cycles in the dependency graph.
220    pub fn detect_cycles(&self) -> Result<(), KdoError> {
221        if is_cyclic_directed(&self.graph) {
222            // Find a cycle for the error message
223            let cycle_desc = find_cycle_description(&self.graph);
224            return Err(KdoError::CircularDependency(cycle_desc));
225        }
226        Ok(())
227    }
228
229    /// Get a serializable summary of all projects.
230    pub fn project_summaries(&self) -> Vec<ProjectSummary> {
231        self.graph
232            .node_indices()
233            .map(|idx| {
234                let project = &self.graph[idx];
235                let dep_count = self
236                    .graph
237                    .neighbors_directed(idx, Direction::Outgoing)
238                    .count();
239                ProjectSummary {
240                    name: project.name.clone(),
241                    language: project.language.to_string(),
242                    summary: project.context_summary.clone(),
243                    dep_count,
244                }
245            })
246            .collect()
247    }
248
249    /// Get the full graph as a serializable structure.
250    pub fn to_graph_output(&self) -> GraphOutput {
251        let projects = self.project_summaries();
252        let edges = self
253            .graph
254            .edge_indices()
255            .filter_map(|edge_idx| {
256                let (source, target) = self.graph.edge_endpoints(edge_idx)?;
257                let kind = &self.graph[edge_idx];
258                Some((
259                    self.graph[source].name.clone(),
260                    self.graph[target].name.clone(),
261                    kind.to_string(),
262                ))
263            })
264            .collect();
265
266        GraphOutput { projects, edges }
267    }
268
269    /// Generate a DOT representation of the graph.
270    pub fn to_dot(&self) -> String {
271        let mut dot = String::from("digraph workspace {\n  rankdir=LR;\n");
272        for idx in self.graph.node_indices() {
273            let project = &self.graph[idx];
274            dot.push_str(&format!(
275                "  \"{}\" [label=\"{}\\n({})\"];\n",
276                project.name, project.name, project.language
277            ));
278        }
279        for edge_idx in self.graph.edge_indices() {
280            if let Some((source, target)) = self.graph.edge_endpoints(edge_idx) {
281                let kind = &self.graph[edge_idx];
282                dot.push_str(&format!(
283                    "  \"{}\" -> \"{}\" [label=\"{}\"];\n",
284                    self.graph[source].name, self.graph[target].name, kind
285                ));
286            }
287        }
288        dot.push_str("}\n");
289        dot
290    }
291
292    /// Generate a text representation of the graph.
293    pub fn to_text(&self) -> String {
294        let mut out = String::new();
295        for idx in self.graph.node_indices() {
296            let project = &self.graph[idx];
297            out.push_str(&format!("{} ({})\n", project.name, project.language));
298            for neighbor in self.graph.neighbors_directed(idx, Direction::Outgoing) {
299                let dep = &self.graph[neighbor];
300                out.push_str(&format!("  -> {}\n", dep.name));
301            }
302        }
303        out
304    }
305
306    /// Dependency closure as JSON string.
307    pub fn dependency_closure_json(&self, project: &str) -> Result<String, KdoError> {
308        let deps = self.dependency_closure(project)?;
309        let names: Vec<&str> = deps.iter().map(|p| p.name.as_str()).collect();
310        serde_json::to_string_pretty(&names).map_err(|e| KdoError::ParseError {
311            path: std::path::PathBuf::from("<json>"),
312            source: e.into(),
313        })
314    }
315
316    /// Affected set as JSON string.
317    pub fn affected_set_json(&self, project: &str) -> Result<String, KdoError> {
318        let affected = self.affected_set(project)?;
319        let names: Vec<&str> = affected.iter().map(|p| p.name.as_str()).collect();
320        serde_json::to_string_pretty(&names).map_err(|e| KdoError::ParseError {
321            path: std::path::PathBuf::from("<json>"),
322            source: e.into(),
323        })
324    }
325
326    /// Return all projects in topological order (dependencies before dependents).
327    ///
328    /// Falls back to insertion order if the graph has cycles (which `detect_cycles` should
329    /// have caught earlier).
330    pub fn topological_order(&self) -> Vec<&Project> {
331        match petgraph::algo::toposort(&self.graph, None) {
332            Ok(indices) => indices.iter().map(|idx| &self.graph[*idx]).collect(),
333            Err(_) => self.projects(),
334        }
335    }
336
337    /// Find projects affected by changes since a git ref.
338    ///
339    /// Shells out to `git diff --name-only` to find changed files,
340    /// then maps them to owning projects.
341    pub fn affected_since_ref(&self, base_ref: &str) -> Result<Vec<String>, KdoError> {
342        let output = std::process::Command::new("git")
343            .args(["diff", "--name-only", &format!("{base_ref}...HEAD")])
344            .current_dir(&self.root)
345            .output()?;
346
347        if !output.status.success() {
348            // Fallback: try without ...HEAD (for uncommitted changes)
349            let output = std::process::Command::new("git")
350                .args(["diff", "--name-only", base_ref])
351                .current_dir(&self.root)
352                .output()?;
353
354            if !output.status.success() {
355                return Ok(Vec::new());
356            }
357            return self.map_paths_to_projects(&String::from_utf8_lossy(&output.stdout));
358        }
359
360        self.map_paths_to_projects(&String::from_utf8_lossy(&output.stdout))
361    }
362
363    /// Map changed file paths to their owning project names.
364    fn map_paths_to_projects(&self, diff_output: &str) -> Result<Vec<String>, KdoError> {
365        let mut affected = std::collections::HashSet::new();
366        for line in diff_output.lines() {
367            let changed_path = self.root.join(line.trim());
368            for project in self.projects() {
369                if changed_path.starts_with(&project.path) {
370                    affected.insert(project.name.clone());
371                }
372            }
373        }
374        let mut result: Vec<String> = affected.into_iter().collect();
375        result.sort();
376        Ok(result)
377    }
378}
379
380/// Apply glob filters from both `kdo.toml` (workspace.projects/exclude) and
381/// `pnpm-workspace.yaml` (packages list). If neither file is present, the
382/// input is returned unchanged.
383fn apply_workspace_globs(root: &Path, paths: Vec<PathBuf>) -> Vec<PathBuf> {
384    use globset::{Glob, GlobSetBuilder};
385
386    let mut include_patterns: Vec<String> = Vec::new();
387    let mut exclude_patterns: Vec<String> = Vec::new();
388
389    let kdo_toml = root.join("kdo.toml");
390    if kdo_toml.exists() {
391        if let Ok(config) = kdo_core::WorkspaceConfig::load(&kdo_toml) {
392            include_patterns.extend(config.workspace.project_globs);
393            exclude_patterns.extend(config.workspace.exclude);
394        }
395    }
396
397    // pnpm-workspace.yaml — honor `packages:` entries, treating `!` prefix as exclude.
398    if let Some((inc, exc)) = parse_pnpm_workspace(&root.join("pnpm-workspace.yaml")) {
399        include_patterns.extend(inc);
400        exclude_patterns.extend(exc);
401    }
402
403    if include_patterns.is_empty() && exclude_patterns.is_empty() {
404        return paths;
405    }
406
407    // Build a GlobSet that treats `/` as a literal separator — matching pnpm/turbo
408    // semantics. Without this, `packages/*` would match `packages/a/b/c`, which is
409    // surprising and incorrect.
410    let build_set = |patterns: &[String]| -> Option<globset::GlobSet> {
411        use globset::GlobBuilder;
412        if patterns.is_empty() {
413            return None;
414        }
415        let mut builder = GlobSetBuilder::new();
416        for p in patterns {
417            if let Ok(g) = GlobBuilder::new(p).literal_separator(true).build() {
418                builder.add(g);
419            } else if let Ok(g) = Glob::new(p) {
420                // Fall back to default builder if the strict form fails to parse.
421                builder.add(g);
422            }
423        }
424        builder.build().ok()
425    };
426
427    let include = build_set(&include_patterns);
428    let exclude = build_set(&exclude_patterns);
429
430    paths
431        .into_iter()
432        .filter(|path| {
433            let Ok(rel) = path.strip_prefix(root) else {
434                return true;
435            };
436            let parent_rel = rel.parent().unwrap_or(Path::new(""));
437
438            if let Some(ex) = &exclude {
439                if ex.is_match(parent_rel) {
440                    return false;
441                }
442            }
443            if let Some(inc) = &include {
444                return inc.is_match(parent_rel);
445            }
446            true
447        })
448        .collect()
449}
450
451/// Parse a `pnpm-workspace.yaml` file and return `(includes, excludes)`.
452/// Patterns prefixed with `!` are excludes. We avoid pulling in a YAML crate by
453/// relying on pnpm's simple documented schema:
454///
455/// ```yaml
456/// packages:
457///   - "apps/*"
458///   - "!**/test/**"
459/// ```
460pub fn parse_pnpm_workspace(path: &Path) -> Option<(Vec<String>, Vec<String>)> {
461    let content = std::fs::read_to_string(path).ok()?;
462    parse_pnpm_workspace_str(&content)
463}
464
465/// Version of [`parse_pnpm_workspace`] that takes the file contents directly.
466/// Exposed for tests.
467pub fn parse_pnpm_workspace_str(content: &str) -> Option<(Vec<String>, Vec<String>)> {
468    let mut in_packages = false;
469    let mut includes = Vec::new();
470    let mut excludes = Vec::new();
471
472    for raw in content.lines() {
473        // Strip comments
474        let line = match raw.find('#') {
475            Some(i) => &raw[..i],
476            None => raw,
477        };
478
479        let trimmed = line.trim_end();
480        if trimmed.is_empty() {
481            continue;
482        }
483
484        if !trimmed.starts_with([' ', '\t', '-']) {
485            // Top-level key: enter or leave the `packages:` block
486            in_packages = trimmed.trim_end_matches(':').trim() == "packages";
487            continue;
488        }
489
490        if !in_packages {
491            continue;
492        }
493
494        // Accept forms:   `- "apps/*"`   `- 'apps/*'`   `- apps/*`
495        let item = trimmed.trim();
496        let Some(rest) = item.strip_prefix('-') else {
497            continue;
498        };
499        let pattern = rest
500            .trim()
501            .trim_matches(|c: char| c == '"' || c == '\'')
502            .trim();
503        if pattern.is_empty() {
504            continue;
505        }
506
507        if let Some(pat) = pattern.strip_prefix('!') {
508            excludes.push(pat.to_string());
509        } else {
510            includes.push(pattern.to_string());
511        }
512    }
513
514    if includes.is_empty() && excludes.is_empty() {
515        None
516    } else {
517        Some((includes, excludes))
518    }
519}
520
521/// Filter manifests to prefer more specific ones (Anchor.toml > Cargo.toml in same dir).
522fn filter_manifests(paths: &[PathBuf]) -> Vec<PathBuf> {
523    let mut by_dir: IndexMap<PathBuf, Vec<PathBuf>> = IndexMap::new();
524    for path in paths {
525        let dir = path.parent().unwrap_or(Path::new(".")).to_path_buf();
526        by_dir.entry(dir).or_default().push(path.clone());
527    }
528
529    let mut filtered = Vec::new();
530    for (_dir, manifests) in &by_dir {
531        // Priority: Anchor.toml > Cargo.toml > package.json > pyproject.toml
532        let has_anchor = manifests
533            .iter()
534            .any(|p| p.file_name().map(|f| f == "Anchor.toml").unwrap_or(false));
535        for manifest in manifests {
536            let filename = manifest
537                .file_name()
538                .map(|f| f.to_string_lossy().to_string())
539                .unwrap_or_default();
540            // If Anchor.toml exists, skip Cargo.toml in the same dir
541            if has_anchor && filename == "Cargo.toml" {
542                continue;
543            }
544            filtered.push(manifest.clone());
545        }
546    }
547
548    filtered
549}
550
551/// Find a cycle and return a human-readable description.
552fn find_cycle_description(graph: &DiGraph<Project, DepKind>) -> String {
553    // Use petgraph's toposort which returns the node involved in a cycle on error
554    match petgraph::algo::toposort(graph, None) {
555        Ok(_) => "unknown cycle".to_string(),
556        Err(cycle) => {
557            let node = &graph[cycle.node_id()];
558            format!("cycle involving '{}'", node.name)
559        }
560    }
561}
562
563#[cfg(test)]
564mod tests {
565    use super::*;
566
567    #[test]
568    fn test_filter_manifests_anchor_priority() {
569        let paths = vec![
570            PathBuf::from("/project/Anchor.toml"),
571            PathBuf::from("/project/Cargo.toml"),
572            PathBuf::from("/project/sub/Cargo.toml"),
573        ];
574        let filtered = filter_manifests(&paths);
575        assert!(filtered.contains(&PathBuf::from("/project/Anchor.toml")));
576        assert!(!filtered.contains(&PathBuf::from("/project/Cargo.toml")));
577        assert!(filtered.contains(&PathBuf::from("/project/sub/Cargo.toml")));
578    }
579}