Skip to main content

cargo_rail/graph/
core.rs

1//! Workspace dependency graph built from cargo_metadata + petgraph
2//!
3//! # Design Philosophy
4//!
5//! We use `cargo_metadata` + `petgraph` directly instead of guppy because:
6//! - **No simulation needed**: We need workspace membership, dep edges, reverse deps,
7//!   and "what's affected" - all available in cargo_metadata already
8//! - **Your domain, your types**: Rail's concepts (WorkspaceGraph, affected analysis)
9//!   should be first-class, not wrappers around guppy
10//! - **Minimal cognitive tax**: Single engineer, opinionated tool - every abstraction
11//!   layer must earn its keep
12//!
13//! ## Graph Structure
14//!
15//! - **Directed Graph**: `A → B` means "A depends on B"
16//! - **Nodes**: Packages (workspace members + dependencies)
17//! - **Edges**: Dependency relationships (normal/dev/build)
18//! - **Index**: Fast lookups by crate name / package ID
19//! - **Algorithms**: Shortest paths, reachability, transitive closure
20//! - **Path cache**: File → owning crate mapping (lazy, interior mutability)
21
22use crate::error::{RailError, RailResult};
23use cargo_metadata::DependencyKind;
24use petgraph::Direction;
25use petgraph::algo::toposort;
26use petgraph::graph::{DiGraph, NodeIndex};
27use std::collections::{HashMap, HashSet};
28use std::path::{Path, PathBuf};
29use std::sync::RwLock;
30
31/// A package node in the dependency graph.
32#[derive(Debug, Clone)]
33pub struct PackageNode {
34  /// Package name
35  pub name: String,
36  /// Path to Cargo.toml
37  pub manifest_path: PathBuf,
38  /// Whether this is a workspace member
39  pub is_workspace_member: bool,
40}
41
42/// Workspace dependency graph.
43///
44/// Built from cargo_metadata, using petgraph for efficient traversals.
45pub struct WorkspaceGraph {
46  /// The dependency graph (petgraph DiGraph)
47  /// Nodes: PackageNode
48  /// Edges: DependencyKind (Normal, Dev, Build)
49  graph: DiGraph<PackageNode, DependencyKind>,
50
51  /// Index: package name → node index
52  name_to_node: HashMap<String, NodeIndex>,
53
54  /// Workspace members only (subset of graph nodes) - for O(1) membership checks
55  workspace_members: HashSet<String>,
56
57  /// Pre-sorted workspace member names - avoids repeated sort on each call
58  sorted_members: Vec<String>,
59
60  /// Workspace root directory (for converting absolute paths to relative)
61  workspace_root: PathBuf,
62
63  /// Path cache: workspace-relative directory → owning crate name
64  /// Built eagerly during graph construction (saves 10-50ms on first file lookup)
65  /// Uses RwLock instead of RefCell for Send/Sync compatibility
66  /// Stores workspace-relative paths to support deleted files (no canonicalize needed)
67  path_cache: RwLock<Option<HashMap<PathBuf, String>>>,
68}
69
70impl WorkspaceGraph {
71  /// Load workspace graph from cargo metadata.
72  ///
73  /// # Performance
74  /// - Graph construction: 10-50ms
75  /// - Path cache: built eagerly (10-50ms)
76  /// - **Does not reload metadata** (use existing from CargoState)
77  pub fn from_metadata(metadata: &cargo_metadata::Metadata) -> RailResult<Self> {
78    // Build petgraph
79    let mut graph = DiGraph::new();
80    let mut name_to_node = HashMap::new();
81    let mut id_to_node = HashMap::new();
82    let mut workspace_members = HashSet::new();
83
84    // Get workspace member IDs
85    let workspace_pkg_ids: HashSet<_> = metadata.workspace_packages().iter().map(|pkg| pkg.id.clone()).collect();
86
87    // Add all packages as nodes (workspace + dependencies)
88    for package in &metadata.packages {
89      let crate_name = package.name.as_ref().to_string();
90
91      let node = PackageNode {
92        name: crate_name.clone(),
93        manifest_path: package.manifest_path.clone().into_std_path_buf(),
94        is_workspace_member: workspace_pkg_ids.contains(&package.id),
95      };
96
97      let node_idx = graph.add_node(node);
98      name_to_node.insert(package.name.as_ref().to_string(), node_idx);
99      id_to_node.insert(package.id.clone(), node_idx);
100
101      if workspace_pkg_ids.contains(&package.id) {
102        workspace_members.insert(package.name.as_ref().to_string());
103      }
104    }
105
106    // Add dependency edges
107    for package in &metadata.packages {
108      let from_idx = id_to_node[&package.id];
109
110      for dep in &package.dependencies {
111        // Find the resolved dependency
112        if let Some(to_idx) = name_to_node.get(dep.name.as_str()) {
113          graph.add_edge(from_idx, *to_idx, dep.kind);
114        }
115      }
116    }
117
118    // Pre-sort workspace members once at construction
119    let mut sorted_members: Vec<String> = workspace_members.iter().cloned().collect();
120    sorted_members.sort();
121
122    // Store workspace root for path normalization
123    let workspace_root = metadata.workspace_root.clone().into_std_path_buf();
124
125    let graph = Self {
126      graph,
127      name_to_node,
128      workspace_members,
129      sorted_members,
130      workspace_root,
131      path_cache: RwLock::new(None),
132    };
133
134    // Build path cache eagerly instead of lazily (saves 10-50ms on first file lookup)
135    graph.build_path_cache();
136
137    Ok(graph)
138  }
139
140  /// Get all workspace member crate names (pre-sorted).
141  pub fn workspace_members(&self) -> &[String] {
142    &self.sorted_members
143  }
144
145  /// Get transitive reverse dependencies (all workspace crates that depend on this one).
146  ///
147  /// Uses petgraph DFS for efficient traversal.
148  ///
149  /// # Performance
150  /// O(V + E) where V = vertices, E = edges. Typically <10ms for <100 crates.
151  pub fn transitive_dependents(&self, crate_name: &str) -> RailResult<Vec<String>> {
152    let start_node = self.find_node(crate_name)?;
153
154    // DFS in reverse direction (incoming edges)
155    let mut visited = HashSet::new();
156    let mut stack = vec![start_node];
157    let mut dependents = HashSet::new();
158
159    while let Some(node_idx) = stack.pop() {
160      if !visited.insert(node_idx) {
161        continue;
162      }
163
164      // Add all incoming neighbors (things that depend on this)
165      for neighbor_idx in self.graph.neighbors_directed(node_idx, Direction::Incoming) {
166        let neighbor = &self.graph[neighbor_idx];
167
168        // Only include workspace members
169        if neighbor.is_workspace_member && neighbor_idx != start_node {
170          dependents.insert(neighbor.name.clone());
171        }
172
173        stack.push(neighbor_idx);
174      }
175    }
176
177    let mut result: Vec<_> = dependents.into_iter().collect();
178    result.sort();
179    Ok(result)
180  }
181
182  /// Get transitive reverse dependencies for multiple crates in a single traversal.
183  ///
184  /// This is more efficient than calling `transitive_dependents()` multiple times
185  /// when you have many direct crates, as it does a single O(V+E) traversal instead
186  /// of O(N × (V+E)) where N is the number of crates.
187  ///
188  /// # Performance
189  /// O(V + E) regardless of input set size. Significantly faster than N separate
190  /// traversals for large input sets.
191  pub fn transitive_dependents_of_set(&self, crate_names: &HashSet<String>) -> RailResult<HashSet<String>> {
192    if crate_names.is_empty() {
193      return Ok(HashSet::new());
194    }
195
196    // Find all start nodes
197    let start_nodes: Vec<NodeIndex> = crate_names
198      .iter()
199      .filter_map(|name| self.name_to_node.get(name).copied())
200      .collect();
201
202    if start_nodes.is_empty() {
203      return Ok(HashSet::new());
204    }
205
206    // Single BFS/DFS from all start nodes
207    let mut visited = HashSet::new();
208    let mut stack = start_nodes;
209    let mut dependents = HashSet::new();
210
211    // Pre-compute start node indices for fast lookup
212    let start_node_set: HashSet<NodeIndex> = stack.iter().copied().collect();
213
214    while let Some(node_idx) = stack.pop() {
215      if !visited.insert(node_idx) {
216        continue;
217      }
218
219      // Add all incoming neighbors (things that depend on this)
220      for neighbor_idx in self.graph.neighbors_directed(node_idx, Direction::Incoming) {
221        let neighbor = &self.graph[neighbor_idx];
222
223        // Only include workspace members that are not in the original set
224        if neighbor.is_workspace_member && !start_node_set.contains(&neighbor_idx) {
225          dependents.insert(neighbor.name.clone());
226        }
227
228        stack.push(neighbor_idx);
229      }
230    }
231
232    Ok(dependents)
233  }
234
235  /// Get workspace members in dependency order (dependencies first, dependents last).
236  ///
237  /// Returns crates in the order they should be published: a crate's dependencies
238  /// are always published before the crate itself.
239  ///
240  /// Uses topological sort on the dependency graph to ensure correct ordering.
241  ///
242  /// # Errors
243  /// Returns error if circular dependencies are detected (should never happen with Cargo).
244  ///
245  /// # Performance
246  /// O(V + E) where V = vertices, E = edges. Typically <10ms for <100 crates.
247  pub fn publish_order(&self) -> RailResult<Vec<String>> {
248    // Build a subgraph with only workspace members
249    // This is critical: external dependencies can have cycles (e.g., serde/serde_derive in dev deps),
250    // but workspace members should never have cycles (Cargo enforces this)
251    let mut subgraph = DiGraph::<&PackageNode, DependencyKind>::new();
252    let mut name_to_subgraph_idx = HashMap::new();
253
254    // Add only workspace member nodes
255    for (name, &idx) in &self.name_to_node {
256      let node = &self.graph[idx];
257      if node.is_workspace_member {
258        let subgraph_idx = subgraph.add_node(node);
259        name_to_subgraph_idx.insert(name.clone(), subgraph_idx);
260      }
261    }
262
263    // Add edges between workspace members only
264    // IMPORTANT: Skip dev-dependencies as they don't affect publish order
265    // (dev-deps can have cycles, including self-references for feature-gated test utils)
266    for (from_name, &from_subgraph_idx) in &name_to_subgraph_idx {
267      let from_graph_idx = self.name_to_node[from_name];
268
269      // Check all outgoing edges from this workspace member
270      for neighbor_graph_idx in self.graph.neighbors_directed(from_graph_idx, Direction::Outgoing) {
271        let neighbor_node = &self.graph[neighbor_graph_idx];
272
273        // Only add edge if the neighbor is also a workspace member
274        if neighbor_node.is_workspace_member
275          && let Some(&to_subgraph_idx) = name_to_subgraph_idx.get(&neighbor_node.name)
276          && let Some(edge) = self.graph.find_edge(from_graph_idx, neighbor_graph_idx)
277          && let Some(&kind) = self.graph.edge_weight(edge)
278        {
279          // Skip dev-dependencies - they don't affect publish order and can have cycles
280          // Also skip self-references (crate depending on itself for test features)
281          if kind != DependencyKind::Development && from_name != &neighbor_node.name {
282            subgraph.add_edge(from_subgraph_idx, to_subgraph_idx, kind);
283          }
284        }
285      }
286    }
287
288    // Now run toposort on the workspace-only subgraph
289    let sorted = toposort(&subgraph, None).map_err(|cycle| {
290      let node = subgraph[cycle.node_id()];
291      RailError::message(format!(
292        "Circular dependency detected involving workspace crate: '{}'. This should not happen in a valid Cargo workspace.",
293        node.name
294      ))
295    })?;
296
297    // Collect names in dependency order
298    let result: Vec<String> = sorted.into_iter().map(|idx| subgraph[idx].name.clone()).collect();
299
300    Ok(result)
301  }
302
303  /// Map a file path to its owning crate.
304  ///
305  /// O(1) lookups using pre-built path cache.
306  /// Filters out VCS directories (.git, .jj) and other non-source paths.
307  ///
308  /// Accepts both:
309  /// - Workspace-relative paths (e.g., "crates/foo/src/lib.rs")
310  /// - Absolute paths (will be converted to workspace-relative)
311  ///
312  /// Works for deleted files - does not require the file to exist on disk.
313  pub fn file_to_crate(&self, file_path: &Path) -> Option<String> {
314    // Filter out VCS directories (git, jj, etc.)
315    if Self::should_ignore_path(file_path) {
316      return None;
317    }
318
319    // Normalize to workspace-relative path
320    let relative_path = self.to_workspace_relative(file_path)?;
321
322    // If the lock is poisoned (another thread panicked), return None gracefully
323    let cache = self.path_cache.read().ok()?;
324    let cache_ref = cache.as_ref()?;
325
326    // Walk up directory tree looking for a crate root
327    let mut current = relative_path.as_path();
328
329    // Check the file's directory first
330    if let Some(parent) = current.parent() {
331      if let Some(crate_name) = cache_ref.get(parent) {
332        return Some(crate_name.clone());
333      }
334      current = parent;
335    }
336
337    // Walk up looking for parent directories
338    while let Some(parent) = current.parent() {
339      if let Some(crate_name) = cache_ref.get(parent) {
340        return Some(crate_name.clone());
341      }
342      current = parent;
343    }
344
345    // Check root directory (for root-level crate)
346    cache_ref.get(Path::new("")).cloned()
347  }
348
349  /// Convert a path to workspace-relative.
350  ///
351  /// Handles:
352  /// - Already relative paths: returned as-is
353  /// - Absolute paths: strips workspace root prefix
354  /// - Paths that don't exist: works without filesystem access
355  fn to_workspace_relative(&self, path: &Path) -> Option<PathBuf> {
356    if path.is_absolute() {
357      // Try to strip workspace root prefix
358      path.strip_prefix(&self.workspace_root).ok().map(PathBuf::from)
359    } else {
360      // Already relative - assume it's workspace-relative
361      Some(path.to_path_buf())
362    }
363  }
364
365  /// Map multiple files to owning crates.
366  pub fn files_to_crates(&self, file_paths: &[impl AsRef<Path>]) -> HashSet<String> {
367    file_paths
368      .iter()
369      .filter_map(|p| self.file_to_crate(p.as_ref()))
370      .collect()
371  }
372
373  /// Find node index by crate name.
374  fn find_node(&self, crate_name: &str) -> RailResult<NodeIndex> {
375    self.name_to_node.get(crate_name).copied().ok_or_else(|| {
376      RailError::message(format!(
377        "Crate '{}' not found. Available workspace crates: {}",
378        crate_name,
379        self.workspace_members().join(", ")
380      ))
381    })
382  }
383
384  /// Check if a path should be ignored (VCS directories, build artifacts, etc.)
385  fn should_ignore_path(path: &Path) -> bool {
386    path.components().any(|component| {
387      if let std::path::Component::Normal(name) = component {
388        let name_str = name.to_string_lossy();
389        // Ignore VCS directories
390        matches!(
391          name_str.as_ref(),
392          ".git" | ".jj" | ".hg" | ".svn" |
393          // Ignore build/target directories
394          "target" | "node_modules" |
395          // Ignore common temp/cache dirs
396          ".cache" | "tmp" | ".tmp"
397        )
398      } else {
399        false
400      }
401    })
402  }
403
404  /// Build path-to-crate mapping cache.
405  ///
406  /// Maps each workspace crate's root directory (workspace-relative) to its name.
407  /// Uses workspace-relative paths to support deleted files (no canonicalize needed).
408  fn build_path_cache(&self) {
409    let mut cache = HashMap::new();
410
411    for crate_name in &self.workspace_members {
412      if let Some(node_idx) = self.name_to_node.get(crate_name) {
413        let node = &self.graph[*node_idx];
414
415        // Get crate root (parent of Cargo.toml) as workspace-relative path
416        if let Some(crate_root) = node.manifest_path.parent() {
417          // Convert to workspace-relative path
418          let relative_root = if crate_root.is_absolute() {
419            crate_root
420              .strip_prefix(&self.workspace_root)
421              .map(PathBuf::from)
422              .unwrap_or_else(|_| crate_root.to_path_buf())
423          } else {
424            crate_root.to_path_buf()
425          };
426
427          cache.insert(relative_root, crate_name.clone());
428        }
429      }
430    }
431
432    // If the lock is poisoned (another thread panicked), skip cache update
433    // The cache will be rebuilt on next access attempt
434    if let Ok(mut guard) = self.path_cache.write() {
435      *guard = Some(cache);
436    }
437  }
438}
439
440#[cfg(test)]
441mod tests {
442  use super::*;
443
444  #[test]
445  fn test_should_ignore_path() {
446    // VCS directories
447    assert!(
448      WorkspaceGraph::should_ignore_path(Path::new(".git")),
449      ".git should be ignored"
450    );
451    assert!(
452      WorkspaceGraph::should_ignore_path(Path::new(".git/objects")),
453      ".git/objects should be ignored"
454    );
455    assert!(
456      WorkspaceGraph::should_ignore_path(Path::new("foo/.git/bar")),
457      "nested .git should be ignored"
458    );
459
460    assert!(
461      WorkspaceGraph::should_ignore_path(Path::new(".jj")),
462      ".jj should be ignored"
463    );
464    assert!(
465      WorkspaceGraph::should_ignore_path(Path::new(".jj/repo")),
466      ".jj/repo should be ignored"
467    );
468    assert!(
469      WorkspaceGraph::should_ignore_path(Path::new("src/.jj/file")),
470      "nested .jj should be ignored"
471    );
472
473    assert!(
474      WorkspaceGraph::should_ignore_path(Path::new(".hg")),
475      ".hg should be ignored"
476    );
477    assert!(
478      WorkspaceGraph::should_ignore_path(Path::new(".svn")),
479      ".svn should be ignored"
480    );
481
482    // Build/dependency directories
483    assert!(
484      WorkspaceGraph::should_ignore_path(Path::new("target")),
485      "target should be ignored"
486    );
487    assert!(
488      WorkspaceGraph::should_ignore_path(Path::new("target/debug")),
489      "target/debug should be ignored"
490    );
491    assert!(
492      WorkspaceGraph::should_ignore_path(Path::new("node_modules")),
493      "node_modules should be ignored"
494    );
495
496    // Cache/temp directories
497    assert!(
498      WorkspaceGraph::should_ignore_path(Path::new(".cache")),
499      ".cache should be ignored"
500    );
501    assert!(
502      WorkspaceGraph::should_ignore_path(Path::new("tmp")),
503      "tmp should be ignored"
504    );
505    assert!(
506      WorkspaceGraph::should_ignore_path(Path::new(".tmp")),
507      ".tmp should be ignored"
508    );
509
510    // Normal paths should NOT be ignored
511    assert!(
512      !WorkspaceGraph::should_ignore_path(Path::new("src")),
513      "src should not be ignored"
514    );
515    assert!(
516      !WorkspaceGraph::should_ignore_path(Path::new("src/main.rs")),
517      "src/main.rs should not be ignored"
518    );
519    assert!(
520      !WorkspaceGraph::should_ignore_path(Path::new("Cargo.toml")),
521      "Cargo.toml should not be ignored"
522    );
523    assert!(
524      !WorkspaceGraph::should_ignore_path(Path::new("crates/foo/src/lib.rs")),
525      "crate files should not be ignored"
526    );
527    assert!(
528      !WorkspaceGraph::should_ignore_path(Path::new("README.md")),
529      "README.md should not be ignored"
530    );
531  }
532}