Skip to main content

cargo_rail/graph/
core.rs

1//! Workspace dependency graph built from cargo_metadata + petgraph
2//!
3//! # Design Philosophy
4//!
5//! We use `cargo_metadata` + `petgraph` directly instead of guppy because:
6//! - **No simulation needed**: We need workspace membership, dep edges, reverse deps,
7//!   and "what's affected" - all available in cargo_metadata already
8//! - **Your domain, your types**: Rail's concepts (WorkspaceGraph, affected analysis)
9//!   should be first-class, not wrappers around guppy
10//! - **Minimal cognitive tax**: Single engineer, opinionated tool - every abstraction
11//!   layer must earn its keep
12//!
13//! ## Graph Structure
14//!
15//! - **Directed Graph**: `A → B` means "A depends on B"
16//! - **Nodes**: Packages (workspace members + dependencies)
17//! - **Edges**: Dependency relationships (normal/dev/build)
18//! - **Index**: Fast lookups by crate name / package ID
19//! - **Algorithms**: Shortest paths, reachability, transitive closure
20//! - **Path cache**: File → owning crate mapping (lazy, interior mutability)
21
22use crate::error::{RailError, RailResult};
23use cargo_metadata::DependencyKind;
24use petgraph::Direction;
25use petgraph::algo::toposort;
26use petgraph::graph::{DiGraph, NodeIndex};
27use rustc_hash::FxHashMap;
28use std::collections::HashSet;
29use std::path::{Path, PathBuf};
30use std::sync::RwLock;
31
32/// A package node in the dependency graph.
33#[derive(Debug, Clone)]
34pub struct PackageNode {
35  /// Package name
36  pub name: String,
37  /// Path to Cargo.toml
38  pub manifest_path: PathBuf,
39  /// Whether this is a workspace member
40  pub is_workspace_member: bool,
41}
42
43/// Workspace dependency graph.
44///
45/// Built from cargo_metadata, using petgraph for efficient traversals.
46pub struct WorkspaceGraph {
47  /// The dependency graph (petgraph DiGraph)
48  /// Nodes: PackageNode
49  /// Edges: DependencyKind (Normal, Dev, Build)
50  graph: DiGraph<PackageNode, DependencyKind>,
51
52  /// Index: package name → node index (FxHashMap for faster String hashing)
53  name_to_node: FxHashMap<String, NodeIndex>,
54
55  /// Workspace members only (subset of graph nodes) - for O(1) membership checks
56  workspace_members: HashSet<String>,
57
58  /// Pre-sorted workspace member names - avoids repeated sort on each call
59  sorted_members: Vec<String>,
60
61  /// Workspace root directory (for converting absolute paths to relative)
62  workspace_root: PathBuf,
63
64  /// Path cache: workspace-relative directory → owning crate name
65  /// Built eagerly during graph construction (saves 10-50ms on first file lookup)
66  /// Uses RwLock instead of RefCell for Send/Sync compatibility
67  /// Stores workspace-relative paths to support deleted files (no canonicalize needed)
68  path_cache: RwLock<Option<FxHashMap<PathBuf, String>>>,
69}
70
71impl WorkspaceGraph {
72  /// Load workspace graph from cargo metadata.
73  ///
74  /// # Performance
75  /// - Graph construction: 10-50ms
76  /// - Path cache: built eagerly (10-50ms)
77  /// - **Does not reload metadata** (use existing from CargoState)
78  pub fn from_metadata(metadata: &cargo_metadata::Metadata) -> RailResult<Self> {
79    // Build petgraph
80    let mut graph = DiGraph::new();
81    let mut name_to_node = FxHashMap::default();
82    let mut id_to_node = FxHashMap::default();
83    let mut workspace_members = HashSet::new();
84
85    // Get workspace member IDs
86    let workspace_pkg_ids: HashSet<_> = metadata.workspace_packages().iter().map(|pkg| pkg.id.clone()).collect();
87
88    // Add all packages as nodes (workspace + dependencies)
89    for package in &metadata.packages {
90      let crate_name = package.name.as_ref().to_string();
91
92      let node = PackageNode {
93        name: crate_name.clone(),
94        manifest_path: package.manifest_path.clone().into_std_path_buf(),
95        is_workspace_member: workspace_pkg_ids.contains(&package.id),
96      };
97
98      let node_idx = graph.add_node(node);
99      name_to_node.insert(package.name.as_ref().to_string(), node_idx);
100      id_to_node.insert(package.id.clone(), node_idx);
101
102      if workspace_pkg_ids.contains(&package.id) {
103        workspace_members.insert(package.name.as_ref().to_string());
104      }
105    }
106
107    // Add dependency edges
108    for package in &metadata.packages {
109      let from_idx = id_to_node[&package.id];
110
111      for dep in &package.dependencies {
112        // Find the resolved dependency
113        if let Some(to_idx) = name_to_node.get(dep.name.as_str()) {
114          graph.add_edge(from_idx, *to_idx, dep.kind);
115        }
116      }
117    }
118
119    // Pre-sort workspace members once at construction
120    let mut sorted_members: Vec<String> = workspace_members.iter().cloned().collect();
121    sorted_members.sort();
122
123    // Store workspace root for path normalization
124    let workspace_root = metadata.workspace_root.clone().into_std_path_buf();
125
126    let graph = Self {
127      graph,
128      name_to_node,
129      workspace_members,
130      sorted_members,
131      workspace_root,
132      path_cache: RwLock::new(None),
133    };
134
135    // Build path cache eagerly instead of lazily (saves 10-50ms on first file lookup)
136    graph.build_path_cache();
137
138    Ok(graph)
139  }
140
141  /// Get all workspace member crate names (pre-sorted).
142  pub fn workspace_members(&self) -> &[String] {
143    &self.sorted_members
144  }
145
146  /// Get transitive reverse dependencies (all workspace crates that depend on this one).
147  ///
148  /// Uses petgraph DFS for efficient traversal.
149  ///
150  /// # Errors
151  ///
152  /// Returns [`RailError`] if `crate_name` is not found in the graph.
153  ///
154  /// # Performance
155  ///
156  /// O(V + E) where V = vertices, E = edges. Typically <10ms for <100 crates.
157  pub fn transitive_dependents(&self, crate_name: &str) -> RailResult<Vec<String>> {
158    let start_node = self.find_node(crate_name)?;
159
160    // DFS in reverse direction (incoming edges)
161    let mut visited = HashSet::new();
162    let mut stack = vec![start_node];
163    let mut dependents = HashSet::new();
164
165    while let Some(node_idx) = stack.pop() {
166      if !visited.insert(node_idx) {
167        continue;
168      }
169
170      // Add all incoming neighbors (things that depend on this)
171      for neighbor_idx in self.graph.neighbors_directed(node_idx, Direction::Incoming) {
172        let neighbor = &self.graph[neighbor_idx];
173
174        // Only include workspace members; skip clone if already in set
175        if neighbor.is_workspace_member && neighbor_idx != start_node && !dependents.contains(&neighbor.name) {
176          dependents.insert(neighbor.name.clone());
177        }
178
179        stack.push(neighbor_idx);
180      }
181    }
182
183    let mut result: Vec<_> = dependents.into_iter().collect();
184    result.sort();
185    Ok(result)
186  }
187
188  /// Get transitive reverse dependencies for multiple crates in a single traversal.
189  ///
190  /// This is more efficient than calling `transitive_dependents()` multiple times
191  /// when you have many direct crates, as it does a single O(V+E) traversal instead
192  /// of O(N × (V+E)) where N is the number of crates.
193  ///
194  /// # Performance
195  /// O(V + E) regardless of input set size. Significantly faster than N separate
196  /// traversals for large input sets.
197  pub fn transitive_dependents_of_set(&self, crate_names: &HashSet<String>) -> RailResult<HashSet<String>> {
198    if crate_names.is_empty() {
199      return Ok(HashSet::new());
200    }
201
202    // Find all start nodes
203    let start_nodes: Vec<NodeIndex> = crate_names
204      .iter()
205      .filter_map(|name| self.name_to_node.get(name).copied())
206      .collect();
207
208    if start_nodes.is_empty() {
209      return Ok(HashSet::new());
210    }
211
212    // Single BFS/DFS from all start nodes
213    let mut visited = HashSet::new();
214    let mut stack = start_nodes;
215    let mut dependents = HashSet::new();
216
217    // Pre-compute start node indices for fast lookup
218    let start_node_set: HashSet<NodeIndex> = stack.iter().copied().collect();
219
220    while let Some(node_idx) = stack.pop() {
221      if !visited.insert(node_idx) {
222        continue;
223      }
224
225      // Add all incoming neighbors (things that depend on this)
226      for neighbor_idx in self.graph.neighbors_directed(node_idx, Direction::Incoming) {
227        let neighbor = &self.graph[neighbor_idx];
228
229        // Only include workspace members not in original set; skip clone if already in set
230        if neighbor.is_workspace_member
231          && !start_node_set.contains(&neighbor_idx)
232          && !dependents.contains(&neighbor.name)
233        {
234          dependents.insert(neighbor.name.clone());
235        }
236
237        stack.push(neighbor_idx);
238      }
239    }
240
241    Ok(dependents)
242  }
243
244  /// Get workspace members in dependency order (dependencies first, dependents last).
245  ///
246  /// Returns crates in the order they should be published: a crate's dependencies
247  /// are always published before the crate itself.
248  ///
249  /// Uses topological sort on the dependency graph to ensure correct ordering.
250  ///
251  /// # Errors
252  /// Returns error if circular dependencies are detected (should never happen with Cargo).
253  ///
254  /// # Performance
255  /// O(V + E) where V = vertices, E = edges. Typically <10ms for <100 crates.
256  pub fn publish_order(&self) -> RailResult<Vec<String>> {
257    // Build a subgraph with only workspace members
258    // This is critical: external dependencies can have cycles (e.g., serde/serde_derive in dev deps),
259    // but workspace members should never have cycles (Cargo enforces this)
260    let mut subgraph = DiGraph::<&PackageNode, DependencyKind>::new();
261    let mut name_to_subgraph_idx = FxHashMap::default();
262
263    // Add only workspace member nodes
264    for (name, &idx) in &self.name_to_node {
265      let node = &self.graph[idx];
266      if node.is_workspace_member {
267        let subgraph_idx = subgraph.add_node(node);
268        name_to_subgraph_idx.insert(name.clone(), subgraph_idx);
269      }
270    }
271
272    // Add edges between workspace members only
273    // IMPORTANT: Skip dev-dependencies as they don't affect publish order
274    // (dev-deps can have cycles, including self-references for feature-gated test utils)
275    for (from_name, &from_subgraph_idx) in &name_to_subgraph_idx {
276      let from_graph_idx = self.name_to_node[from_name];
277
278      // Check all outgoing edges from this workspace member
279      for neighbor_graph_idx in self.graph.neighbors_directed(from_graph_idx, Direction::Outgoing) {
280        let neighbor_node = &self.graph[neighbor_graph_idx];
281
282        // Only add edge if the neighbor is also a workspace member
283        if neighbor_node.is_workspace_member
284          && let Some(&to_subgraph_idx) = name_to_subgraph_idx.get(&neighbor_node.name)
285          && let Some(edge) = self.graph.find_edge(from_graph_idx, neighbor_graph_idx)
286          && let Some(&kind) = self.graph.edge_weight(edge)
287        {
288          // Skip dev-dependencies - they don't affect publish order and can have cycles
289          // Also skip self-references (crate depending on itself for test features)
290          if kind != DependencyKind::Development && from_name != &neighbor_node.name {
291            subgraph.add_edge(from_subgraph_idx, to_subgraph_idx, kind);
292          }
293        }
294      }
295    }
296
297    // Now run toposort on the workspace-only subgraph
298    let sorted = toposort(&subgraph, None).map_err(|cycle| {
299      let node = subgraph[cycle.node_id()];
300      RailError::message(format!(
301        "Circular dependency detected involving workspace crate: '{}'. This should not happen in a valid Cargo workspace.",
302        node.name
303      ))
304    })?;
305
306    // Collect names in dependency order
307    let result: Vec<String> = sorted.into_iter().map(|idx| subgraph[idx].name.clone()).collect();
308
309    Ok(result)
310  }
311
312  /// Map a file path to its owning crate.
313  ///
314  /// O(1) lookups using pre-built path cache.
315  /// Filters out VCS directories (.git, .jj) and other non-source paths.
316  ///
317  /// Accepts both:
318  /// - Workspace-relative paths (e.g., "crates/foo/src/lib.rs")
319  /// - Absolute paths (will be converted to workspace-relative)
320  ///
321  /// Works for deleted files - does not require the file to exist on disk.
322  pub fn file_to_crate(&self, file_path: &Path) -> Option<String> {
323    // Filter out VCS directories (git, jj, etc.)
324    if Self::should_ignore_path(file_path) {
325      return None;
326    }
327
328    // Normalize to workspace-relative path
329    let relative_path = self.to_workspace_relative(file_path)?;
330
331    // If the lock is poisoned (another thread panicked), return None gracefully
332    let cache = self.path_cache.read().ok()?;
333    let cache_ref = cache.as_ref()?;
334
335    // Walk up directory tree looking for a crate root
336    let mut current = relative_path.as_path();
337
338    // Check the file's directory first
339    if let Some(parent) = current.parent() {
340      if let Some(crate_name) = cache_ref.get(parent) {
341        return Some(crate_name.clone());
342      }
343      current = parent;
344    }
345
346    // Walk up looking for parent directories
347    while let Some(parent) = current.parent() {
348      if let Some(crate_name) = cache_ref.get(parent) {
349        return Some(crate_name.clone());
350      }
351      current = parent;
352    }
353
354    // Check root directory (for root-level crate)
355    cache_ref.get(Path::new("")).cloned()
356  }
357
358  /// Convert a path to workspace-relative.
359  ///
360  /// Handles:
361  /// - Already relative paths: returned as-is
362  /// - Absolute paths: strips workspace root prefix
363  /// - Paths that don't exist: works without filesystem access
364  fn to_workspace_relative(&self, path: &Path) -> Option<PathBuf> {
365    if path.is_absolute() {
366      // Try to strip workspace root prefix
367      path.strip_prefix(&self.workspace_root).ok().map(PathBuf::from)
368    } else {
369      // Already relative - assume it's workspace-relative
370      Some(path.to_path_buf())
371    }
372  }
373
374  /// Map multiple files to owning crates.
375  pub fn files_to_crates(&self, file_paths: &[impl AsRef<Path>]) -> HashSet<String> {
376    file_paths
377      .iter()
378      .filter_map(|p| self.file_to_crate(p.as_ref()))
379      .collect()
380  }
381
382  /// Find node index by crate name.
383  fn find_node(&self, crate_name: &str) -> RailResult<NodeIndex> {
384    self.name_to_node.get(crate_name).copied().ok_or_else(|| {
385      RailError::message(format!(
386        "Crate '{}' not found. Available workspace crates: {}",
387        crate_name,
388        self.workspace_members().join(", ")
389      ))
390    })
391  }
392
393  /// Check if a path should be ignored (VCS directories, build artifacts, etc.)
394  fn should_ignore_path(path: &Path) -> bool {
395    path.components().any(|component| {
396      if let std::path::Component::Normal(name) = component {
397        let name_str = name.to_string_lossy();
398        // Ignore VCS directories
399        matches!(
400          name_str.as_ref(),
401          ".git" | ".jj" | ".hg" | ".svn" |
402          // Ignore build/target directories
403          "target" | "node_modules" |
404          // Ignore common temp/cache dirs
405          ".cache" | "tmp" | ".tmp"
406        )
407      } else {
408        false
409      }
410    })
411  }
412
413  /// Build path-to-crate mapping cache.
414  ///
415  /// Maps each workspace crate's root directory (workspace-relative) to its name.
416  /// Uses workspace-relative paths to support deleted files (no canonicalize needed).
417  fn build_path_cache(&self) {
418    let mut cache = FxHashMap::default();
419
420    for crate_name in &self.workspace_members {
421      if let Some(node_idx) = self.name_to_node.get(crate_name) {
422        let node = &self.graph[*node_idx];
423
424        // Get crate root (parent of Cargo.toml) as workspace-relative path
425        if let Some(crate_root) = node.manifest_path.parent() {
426          // Convert to workspace-relative path
427          let relative_root = if crate_root.is_absolute() {
428            crate_root
429              .strip_prefix(&self.workspace_root)
430              .map(PathBuf::from)
431              .unwrap_or_else(|_| crate_root.to_path_buf())
432          } else {
433            crate_root.to_path_buf()
434          };
435
436          cache.insert(relative_root, crate_name.clone());
437        }
438      }
439    }
440
441    // If the lock is poisoned (another thread panicked), skip cache update
442    // The cache will be rebuilt on next access attempt
443    if let Ok(mut guard) = self.path_cache.write() {
444      *guard = Some(cache);
445    }
446  }
447}
448
449#[cfg(test)]
450mod tests {
451  use super::*;
452
453  #[test]
454  fn test_should_ignore_path() {
455    // VCS directories
456    assert!(
457      WorkspaceGraph::should_ignore_path(Path::new(".git")),
458      ".git should be ignored"
459    );
460    assert!(
461      WorkspaceGraph::should_ignore_path(Path::new(".git/objects")),
462      ".git/objects should be ignored"
463    );
464    assert!(
465      WorkspaceGraph::should_ignore_path(Path::new("foo/.git/bar")),
466      "nested .git should be ignored"
467    );
468
469    assert!(
470      WorkspaceGraph::should_ignore_path(Path::new(".jj")),
471      ".jj should be ignored"
472    );
473    assert!(
474      WorkspaceGraph::should_ignore_path(Path::new(".jj/repo")),
475      ".jj/repo should be ignored"
476    );
477    assert!(
478      WorkspaceGraph::should_ignore_path(Path::new("src/.jj/file")),
479      "nested .jj should be ignored"
480    );
481
482    assert!(
483      WorkspaceGraph::should_ignore_path(Path::new(".hg")),
484      ".hg should be ignored"
485    );
486    assert!(
487      WorkspaceGraph::should_ignore_path(Path::new(".svn")),
488      ".svn should be ignored"
489    );
490
491    // Build/dependency directories
492    assert!(
493      WorkspaceGraph::should_ignore_path(Path::new("target")),
494      "target should be ignored"
495    );
496    assert!(
497      WorkspaceGraph::should_ignore_path(Path::new("target/debug")),
498      "target/debug should be ignored"
499    );
500    assert!(
501      WorkspaceGraph::should_ignore_path(Path::new("node_modules")),
502      "node_modules should be ignored"
503    );
504
505    // Cache/temp directories
506    assert!(
507      WorkspaceGraph::should_ignore_path(Path::new(".cache")),
508      ".cache should be ignored"
509    );
510    assert!(
511      WorkspaceGraph::should_ignore_path(Path::new("tmp")),
512      "tmp should be ignored"
513    );
514    assert!(
515      WorkspaceGraph::should_ignore_path(Path::new(".tmp")),
516      ".tmp should be ignored"
517    );
518
519    // Normal paths should NOT be ignored
520    assert!(
521      !WorkspaceGraph::should_ignore_path(Path::new("src")),
522      "src should not be ignored"
523    );
524    assert!(
525      !WorkspaceGraph::should_ignore_path(Path::new("src/main.rs")),
526      "src/main.rs should not be ignored"
527    );
528    assert!(
529      !WorkspaceGraph::should_ignore_path(Path::new("Cargo.toml")),
530      "Cargo.toml should not be ignored"
531    );
532    assert!(
533      !WorkspaceGraph::should_ignore_path(Path::new("crates/foo/src/lib.rs")),
534      "crate files should not be ignored"
535    );
536    assert!(
537      !WorkspaceGraph::should_ignore_path(Path::new("README.md")),
538      "README.md should not be ignored"
539    );
540  }
541}