Skip to main content

batuta/stack/
graph.rs

1//! Dependency Graph Analysis
2//!
3//! Uses trueno-graph for cycle detection and topological sorting.
4//! Edge metadata stored separately since trueno-graph only stores f32 weights.
5//!
6//! When the `trueno-graph` feature is disabled, a lightweight fallback
7//! implementation is provided using only the standard library.
8
9use crate::stack::is_paiml_crate;
10use crate::stack::types::*;
11use anyhow::{anyhow, Result};
12use std::collections::HashMap;
13use std::path::Path;
14#[cfg(feature = "trueno-graph")]
15use trueno_graph::{is_cyclic, toposort, CsrGraph, NodeId};
16
17// ============================================================================
18// Fallback graph primitives when trueno-graph is not available
19// ============================================================================
20
21#[cfg(not(feature = "trueno-graph"))]
22mod fallback {
23    use std::collections::{HashMap, HashSet, VecDeque};
24
25    /// Lightweight node identifier (mirrors trueno_graph::NodeId)
26    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
27    pub struct NodeId(pub u32);
28
29    /// Minimal adjacency-list graph (replaces CsrGraph)
30    #[derive(Debug, Clone)]
31    pub struct CsrGraph {
32        outgoing: HashMap<u32, Vec<u32>>,
33        incoming: HashMap<u32, Vec<u32>>,
34        names: HashMap<u32, String>,
35    }
36
37    impl CsrGraph {
38        pub fn new() -> Self {
39            Self { outgoing: HashMap::new(), incoming: HashMap::new(), names: HashMap::new() }
40        }
41
42        pub fn from_edge_list(edges: &[(NodeId, NodeId, f32)]) -> Result<Self, &'static str> {
43            let mut g = Self::new();
44            for &(from, to, _) in edges {
45                let _ = g.add_edge(from, to, 1.0);
46            }
47            Ok(g)
48        }
49
50        pub fn set_node_name(&mut self, id: NodeId, name: String) {
51            self.names.insert(id.0, name);
52        }
53
54        pub fn add_edge(
55            &mut self,
56            from: NodeId,
57            to: NodeId,
58            _weight: f32,
59        ) -> Result<(), &'static str> {
60            self.outgoing.entry(from.0).or_default().push(to.0);
61            self.incoming.entry(to.0).or_default().push(from.0);
62            Ok(())
63        }
64
65        pub fn outgoing_neighbors(&self, id: NodeId) -> Result<&[u32], &'static str> {
66            Ok(self.outgoing.get(&id.0).map(|v| v.as_slice()).unwrap_or(&[]))
67        }
68
69        pub fn incoming_neighbors(&self, id: NodeId) -> Result<&[u32], &'static str> {
70            Ok(self.incoming.get(&id.0).map(|v| v.as_slice()).unwrap_or(&[]))
71        }
72
73        fn all_nodes(&self) -> HashSet<u32> {
74            let mut nodes = HashSet::new();
75            for (&k, vs) in &self.outgoing {
76                nodes.insert(k);
77                for &v in vs {
78                    nodes.insert(v);
79                }
80            }
81            for (&k, vs) in &self.incoming {
82                nodes.insert(k);
83                for &v in vs {
84                    nodes.insert(v);
85                }
86            }
87            nodes
88        }
89    }
90
91    /// Detect cycles via DFS
92    pub fn is_cyclic(graph: &CsrGraph) -> bool {
93        let nodes = graph.all_nodes();
94        let mut visited = HashSet::new();
95        let mut on_stack = HashSet::new();
96
97        for &node in &nodes {
98            if !visited.contains(&node) && dfs_cycle(graph, node, &mut visited, &mut on_stack) {
99                return true;
100            }
101        }
102        false
103    }
104
105    fn dfs_cycle(
106        graph: &CsrGraph,
107        node: u32,
108        visited: &mut HashSet<u32>,
109        on_stack: &mut HashSet<u32>,
110    ) -> bool {
111        visited.insert(node);
112        on_stack.insert(node);
113        if let Ok(neighbors) = graph.outgoing_neighbors(NodeId(node)) {
114            for &neighbor in neighbors {
115                if !visited.contains(&neighbor) {
116                    if dfs_cycle(graph, neighbor, visited, on_stack) {
117                        return true;
118                    }
119                } else if on_stack.contains(&neighbor) {
120                    return true;
121                }
122            }
123        }
124        on_stack.remove(&node);
125        false
126    }
127
128    /// Topological sort via Kahn's algorithm
129    pub fn toposort(graph: &CsrGraph) -> Result<Vec<NodeId>, &'static str> {
130        let nodes = graph.all_nodes();
131        let mut in_degree: HashMap<u32, usize> = HashMap::new();
132
133        for &node in &nodes {
134            in_degree.entry(node).or_insert(0);
135            if let Ok(neighbors) = graph.outgoing_neighbors(NodeId(node)) {
136                for &neighbor in neighbors {
137                    *in_degree.entry(neighbor).or_insert(0) += 1;
138                }
139            }
140        }
141
142        let mut queue: VecDeque<u32> =
143            in_degree.iter().filter(|(_, &deg)| deg == 0).map(|(&node, _)| node).collect();
144
145        let mut result = Vec::new();
146        while let Some(node) = queue.pop_front() {
147            result.push(NodeId(node));
148            if let Ok(neighbors) = graph.outgoing_neighbors(NodeId(node)) {
149                for &neighbor in neighbors {
150                    if let Some(deg) = in_degree.get_mut(&neighbor) {
151                        *deg -= 1;
152                        if *deg == 0 {
153                            queue.push_back(neighbor);
154                        }
155                    }
156                }
157            }
158        }
159
160        if result.len() != nodes.len() {
161            return Err("cycle detected");
162        }
163        Ok(result)
164    }
165}
166
167#[cfg(not(feature = "trueno-graph"))]
168use fallback::{is_cyclic, toposort, CsrGraph, NodeId};
169
170/// Dependency graph for the PAIML stack
171#[derive(Debug, Clone)]
172pub struct DependencyGraph {
173    /// The underlying trueno-graph CSR graph
174    graph: CsrGraph,
175
176    /// Map from crate name to node ID
177    name_to_id: HashMap<String, NodeId>,
178
179    /// Map from node ID to crate name
180    id_to_name: HashMap<NodeId, String>,
181
182    /// Edge metadata (not stored in trueno-graph which only has f32 weights)
183    edge_data: HashMap<(NodeId, NodeId), DependencyEdge>,
184
185    /// Crate information for each node
186    crate_info: HashMap<String, CrateInfo>,
187
188    /// Next node ID to assign
189    next_id: u32,
190}
191
192/// Edge data in the dependency graph
193#[derive(Debug, Clone)]
194pub struct DependencyEdge {
195    /// Version requirement
196    pub version_req: String,
197
198    /// Whether this is a path dependency
199    pub is_path: bool,
200
201    /// Dependency kind
202    pub kind: DependencyKind,
203}
204
205impl DependencyGraph {
206    /// Create a new empty dependency graph
207    pub fn new() -> Self {
208        Self {
209            graph: CsrGraph::new(),
210            name_to_id: HashMap::new(),
211            id_to_name: HashMap::new(),
212            edge_data: HashMap::new(),
213            crate_info: HashMap::new(),
214            next_id: 0,
215        }
216    }
217
218    /// Build graph from cargo metadata, falling back to binary detection
219    /// for non-Rust projects.
220    #[cfg(feature = "native")]
221    pub fn from_workspace(workspace_path: &Path) -> Result<Self> {
222        use cargo_metadata::MetadataCommand;
223        use std::collections::HashSet;
224
225        let cargo_toml = workspace_path.join("Cargo.toml");
226        if !cargo_toml.exists() {
227            return Self::from_installed_binaries();
228        }
229
230        let metadata = MetadataCommand::new()
231            .manifest_path(cargo_toml)
232            .exec()
233            .map_err(|e| anyhow!("Failed to read cargo metadata: {}", e))?;
234
235        let mut graph = Self::new();
236
237        // Collect workspace member package IDs so we only add edges from
238        // crates WE control.  Resolved transitive deps (e.g. trueno pulled
239        // in by aprender) may have optional reverse deps that create false
240        // cycles in the graph (GH-25).
241        let workspace_member_ids: HashSet<&cargo_metadata::PackageId> =
242            metadata.workspace_members.iter().collect();
243
244        // First pass: add all PAIML crates as nodes
245        for package in &metadata.packages {
246            if is_paiml_crate(&package.name) {
247                let version = package.version.clone();
248                let semver_version =
249                    semver::Version::new(version.major, version.minor, version.patch);
250
251                let crate_info = CrateInfo::new(
252                    &package.name,
253                    semver_version,
254                    package.manifest_path.clone().into_std_path_buf(),
255                );
256
257                graph.add_crate(crate_info);
258            }
259        }
260
261        // Second pass: add edges ONLY from workspace members.
262        // Non-workspace PAIML packages are external — their own deps
263        // (which may include optional reverse deps like trueno → aprender)
264        // must NOT be added, as they create false cycles (GH-25).
265        for package in &metadata.packages {
266            if !is_paiml_crate(&package.name) {
267                continue;
268            }
269            if !workspace_member_ids.contains(&package.id) {
270                continue;
271            }
272
273            for dep in &package.dependencies {
274                if is_paiml_crate(&dep.name) {
275                    let is_path = dep.path.is_some();
276                    let version_req = dep.req.to_string();
277
278                    let kind = match dep.kind {
279                        cargo_metadata::DependencyKind::Normal => DependencyKind::Normal,
280                        cargo_metadata::DependencyKind::Development => DependencyKind::Dev,
281                        cargo_metadata::DependencyKind::Build => DependencyKind::Build,
282                        _ => DependencyKind::Normal,
283                    };
284
285                    let edge = DependencyEdge { version_req, is_path, kind };
286
287                    // Add edge: package depends on dep
288                    graph.add_dependency(&package.name, &dep.name, edge);
289
290                    // Update crate info with dependency
291                    if let Some(info) = graph.crate_info.get_mut(&package.name) {
292                        let dep_info = if is_path {
293                            let path = dep
294                                .path
295                                .as_ref()
296                                .map(|p| p.clone().into_std_path_buf())
297                                .unwrap_or_default();
298                            DependencyInfo::path(&dep.name, path)
299                        } else {
300                            DependencyInfo::new(&dep.name, dep.req.to_string())
301                        };
302                        info.paiml_dependencies.push(dep_info);
303                    }
304                }
305            }
306        }
307
308        Ok(graph)
309    }
310
311    /// Build a graph from installed PAIML binaries when no Cargo.toml is present.
312    /// This allows `batuta stack status` to work in non-Rust projects that use
313    /// sovereign stack tools via CLI.
314    #[cfg(feature = "native")]
315    fn from_installed_binaries() -> Result<Self> {
316        use std::process::Command;
317
318        let mut graph = Self::new();
319
320        // Map binary names to crate names for common PAIML tools
321        let binary_crates: &[(&str, &str)] = &[
322            ("apr", "aprender"),
323            ("pv", "provable-contracts"),
324            ("forjar", "forjar"),
325            ("alimentar", "alimentar"),
326            ("batuta", "batuta"),
327            ("pmat", "pmat"),
328            ("bashrs", "bashrs"),
329            ("realizar", "realizar"),
330            ("entrenar", "entrenar"),
331            ("certeza", "certeza"),
332            ("ttop", "ttop"),
333            ("depyler", "depyler"),
334        ];
335
336        for (binary, crate_name) in binary_crates {
337            if let Ok(output) = Command::new("which").arg(binary).output() {
338                if output.status.success() {
339                    let bin_path = String::from_utf8_lossy(&output.stdout).trim().to_string();
340                    let version = get_binary_version(binary);
341                    let crate_info =
342                        CrateInfo::new(*crate_name, version, std::path::PathBuf::from(&bin_path));
343                    graph.add_crate(crate_info);
344                }
345            }
346        }
347
348        Ok(graph)
349    }
350
351    /// Add a crate to the graph
352    pub fn add_crate(&mut self, info: CrateInfo) {
353        let name = info.name.clone();
354        if !self.name_to_id.contains_key(&name) {
355            let id = NodeId(self.next_id);
356            self.next_id += 1;
357            self.name_to_id.insert(name.clone(), id);
358            self.id_to_name.insert(id, name.clone());
359            self.graph.set_node_name(id, name.clone());
360        }
361        self.crate_info.insert(name, info);
362    }
363
364    /// Ensure a node exists for a crate name
365    fn ensure_node(&mut self, name: &str) -> NodeId {
366        if let Some(&id) = self.name_to_id.get(name) {
367            id
368        } else {
369            let id = NodeId(self.next_id);
370            self.next_id += 1;
371            self.name_to_id.insert(name.to_string(), id);
372            self.id_to_name.insert(id, name.to_string());
373            self.graph.set_node_name(id, name.to_string());
374            id
375        }
376    }
377
378    /// Add a dependency edge between crates
379    pub fn add_dependency(&mut self, from: &str, to: &str, edge: DependencyEdge) {
380        let from_id = self.ensure_node(from);
381        let to_id = self.ensure_node(to);
382
383        // Store edge metadata
384        self.edge_data.insert((from_id, to_id), edge);
385
386        // Add edge to trueno-graph (weight = 1.0)
387        let _ = self.graph.add_edge(from_id, to_id, 1.0);
388    }
389
390    /// Build a release graph (excludes dev-dependencies)
391    fn build_release_graph(&self) -> CsrGraph {
392        let mut edges: Vec<(NodeId, NodeId, f32)> = Vec::new();
393
394        // Collect non-dev edges
395        for ((from_id, to_id), edge_data) in &self.edge_data {
396            if edge_data.kind != DependencyKind::Dev {
397                edges.push((*from_id, *to_id, 1.0));
398            }
399        }
400
401        CsrGraph::from_edge_list(&edges).unwrap_or_else(|_| CsrGraph::new())
402    }
403
404    /// Check if the graph has cycles (excluding dev-dependencies)
405    ///
406    /// Dev-dependencies are excluded because they don't create real dependency
407    /// cycles for release ordering. A crate can have a dev-dependency on another
408    /// crate that depends on it (common for integration testing).
409    pub fn has_cycles(&self) -> bool {
410        let release_graph = self.build_release_graph();
411        is_cyclic(&release_graph)
412    }
413
414    /// Get topological order for releases (dependencies first)
415    ///
416    /// Uses the release graph which excludes dev-dependencies to avoid
417    /// false cycle detection (fixes issue #13).
418    pub fn topological_order(&self) -> Result<Vec<String>> {
419        let release_graph = self.build_release_graph();
420
421        if is_cyclic(&release_graph) {
422            return Err(anyhow!("Circular dependency detected in the graph"));
423        }
424
425        let sorted =
426            toposort(&release_graph).map_err(|_| anyhow!("Failed to compute topological order"))?;
427
428        // Map NodeIds back to names and reverse
429        // Reverse because toposort gives dependents first, we want dependencies first
430        let names: Vec<String> = sorted
431            .iter()
432            .rev() // Dependencies should come before dependents
433            .filter_map(|id| self.id_to_name.get(id).cloned())
434            .collect();
435
436        Ok(names)
437    }
438
439    /// Get release order for a specific crate and its dependencies
440    pub fn release_order_for(&self, crate_name: &str) -> Result<Vec<String>> {
441        let full_order = self.topological_order()?;
442        let deps = self.all_dependencies(crate_name);
443
444        // Filter to only include the target crate and its dependencies
445        let mut order: Vec<String> = full_order
446            .into_iter()
447            .filter(|name| name == crate_name || deps.contains(name))
448            .collect();
449
450        // Ensure the target crate is at the end
451        if let Some(pos) = order.iter().position(|n| n == crate_name) {
452            order.remove(pos);
453            order.push(crate_name.to_string());
454        }
455
456        Ok(order)
457    }
458
459    /// Get all dependencies (transitive) for a crate
460    pub fn all_dependencies(&self, crate_name: &str) -> Vec<String> {
461        let mut deps = Vec::new();
462        let mut visited = std::collections::HashSet::new();
463
464        if let Some(&id) = self.name_to_id.get(crate_name) {
465            self.collect_dependencies(id, &mut deps, &mut visited);
466        }
467
468        deps
469    }
470
471    fn collect_dependencies(
472        &self,
473        id: NodeId,
474        deps: &mut Vec<String>,
475        visited: &mut std::collections::HashSet<NodeId>,
476    ) {
477        if visited.contains(&id) {
478            return;
479        }
480        visited.insert(id);
481
482        // Get outgoing neighbors from trueno-graph
483        if let Ok(neighbors) = self.graph.outgoing_neighbors(id) {
484            for &neighbor_id in neighbors {
485                let neighbor = NodeId(neighbor_id);
486                if let Some(name) = self.id_to_name.get(&neighbor) {
487                    if !deps.contains(name) {
488                        deps.push(name.clone());
489                    }
490                    self.collect_dependencies(neighbor, deps, visited);
491                }
492            }
493        }
494    }
495
496    /// Get crates that depend on this crate (reverse dependencies)
497    pub fn dependents(&self, crate_name: &str) -> Vec<String> {
498        let mut dependents = Vec::new();
499
500        if let Some(&id) = self.name_to_id.get(crate_name) {
501            if let Ok(neighbors) = self.graph.incoming_neighbors(id) {
502                for &neighbor_id in neighbors {
503                    if let Some(name) = self.id_to_name.get(&NodeId(neighbor_id)) {
504                        dependents.push(name.clone());
505                    }
506                }
507            }
508        }
509
510        dependents
511    }
512
513    /// Get all path dependencies in the graph
514    pub fn find_path_dependencies(&self) -> Vec<PathDependencyIssue> {
515        let mut issues = Vec::new();
516
517        for ((from_id, to_id), edge_data) in &self.edge_data {
518            if edge_data.is_path {
519                if let (Some(from), Some(to)) =
520                    (self.id_to_name.get(from_id), self.id_to_name.get(to_id))
521                {
522                    issues.push(PathDependencyIssue {
523                        crate_name: from.clone(),
524                        dependency: to.clone(),
525                        current: format!("path = \"../{}\"", to),
526                        recommended: None, // Will be filled in by checker
527                    });
528                }
529            }
530        }
531
532        issues
533    }
534
535    /// Detect version conflicts for external dependencies
536    pub fn detect_conflicts(&self) -> Vec<VersionConflict> {
537        let mut dep_versions: HashMap<String, Vec<ConflictUsage>> = HashMap::new();
538
539        // Collect all external dependency versions
540        for (crate_name, info) in &self.crate_info {
541            for dep in &info.external_dependencies {
542                dep_versions.entry(dep.name.clone()).or_default().push(ConflictUsage {
543                    crate_name: crate_name.clone(),
544                    version_req: dep.version_req.clone(),
545                });
546            }
547        }
548
549        // Find conflicts (different versions of same dependency)
550        let mut conflicts = Vec::new();
551        for (dep_name, usages) in dep_versions {
552            if usages.len() > 1 {
553                let versions: std::collections::HashSet<_> =
554                    usages.iter().map(|u| &u.version_req).collect();
555
556                if versions.len() > 1 {
557                    conflicts.push(VersionConflict {
558                        dependency: dep_name,
559                        usages,
560                        recommendation: None,
561                    });
562                }
563            }
564        }
565
566        conflicts
567    }
568
569    /// Get crate info by name
570    pub fn get_crate(&self, name: &str) -> Option<&CrateInfo> {
571        self.crate_info.get(name)
572    }
573
574    /// Get mutable crate info by name
575    pub fn get_crate_mut(&mut self, name: &str) -> Option<&mut CrateInfo> {
576        self.crate_info.get_mut(name)
577    }
578
579    /// Get all crates in the graph
580    pub fn all_crates(&self) -> impl Iterator<Item = &CrateInfo> {
581        self.crate_info.values()
582    }
583
584    /// Get number of crates in the graph
585    pub fn crate_count(&self) -> usize {
586        self.crate_info.len()
587    }
588
589    /// Check if a node exists (for compatibility with tests)
590    pub(crate) fn node_indices_contains(&self, name: &str) -> bool {
591        self.name_to_id.contains_key(name)
592    }
593}
594
595impl Default for DependencyGraph {
596    fn default() -> Self {
597        Self::new()
598    }
599}
600
601/// Path dependency issue for reporting
602#[derive(Debug, Clone)]
603pub struct PathDependencyIssue {
604    /// Crate that has the path dependency
605    pub crate_name: String,
606
607    /// Dependency that is path-based
608    pub dependency: String,
609
610    /// Current specification
611    pub current: String,
612
613    /// Recommended crates.io version
614    pub recommended: Option<String>,
615}
616
617/// Extract a semver version from a binary's --version output.
618#[cfg(feature = "native")]
619fn get_binary_version(binary: &str) -> semver::Version {
620    let output = std::process::Command::new(binary).arg("--version").output().ok();
621
622    let version_str =
623        output.as_ref().map(|o| String::from_utf8_lossy(&o.stdout).to_string()).unwrap_or_default();
624
625    // Parse version from output like "batuta 0.7.2" or "bashrs v6.65.0"
626    let version = version_str
627        .split_whitespace()
628        .find_map(|word| {
629            let w = word.trim_start_matches('v');
630            semver::Version::parse(w).ok()
631        })
632        .unwrap_or_else(|| semver::Version::new(0, 0, 0));
633
634    version
635}
636
637#[cfg(test)]
638#[path = "graph_tests.rs"]
639mod tests;