pugio_lib/graph.rs
1use std::collections::{BTreeMap, HashMap, VecDeque};
2
3use petgraph::{
4 dot::{Config, Dot},
5 graph::NodeIndex,
6 prelude::StableGraph,
7 stable_graph::EdgeReference,
8 visit::{Bfs, Dfs, EdgeRef, Topo, Walker},
9};
10
11use crate::{
12 cargo::{get_dep_graph, get_size_map},
13 coloring::{Gradient, Values},
14 template::Templating,
15};
16
17/// Represents a dependency directed acyclic graph (DAG) with size information, where each node
18/// represents a crate, and each directed edge represents a binary relation of dependency of the
19/// source node on the target node.
20///
21/// It also keeps the size information of each crate as parsed from `cargo-bloat` output in a
22/// map, which can be accessed using the [`size`](Self::size) method for a given node index.
23///
24/// The node indices can be iterated using the [`node_indices`](Self::node_indices) method, though
25/// there is **no** guarantee of the order of iteration. Use [`dfs`](Self::dfs), [`bfs`](Self::bfs),
26/// or [`topo`](Self::topo) if the order of iteration is important, e.g. when a crate must be
27/// traversed before its dependencies.
28///
29/// While [Graph] can be mutated, it is deliberately limited to only
30/// [`change_root`](Self::change_root), [`remove_deep_deps`](Self::remove_deep_deps) and
31/// [`remove_indices`](Self::remove_indices), as the graph structure should only be reduced and not
32/// expanded. In addition, any remaining non-reachable node from the root after these operations
33/// will also be removed.
34///
35/// # Examples
36///
37///
38/// The following example demonstrates a common pattern that will fail to compile without `collect`
39/// as otherwise it borrows from `graph` immutably:
40///
41/// ```
42/// # use pugio_lib::graph::Graph;
43/// fn remove_z(graph: &mut Graph) {
44/// let iter = graph.node_indices().filter(|i| {
45/// graph.node_weight(*i).short().starts_with('z')
46/// }).collect::<Vec<_>>().into_iter();
47///
48/// graph.remove_indices(iter);
49/// }
50/// ```
51///
52/// Use ordered tranversals when required:
53///
54/// ```
55/// # use pugio_lib::graph::Graph;
56/// fn dep_counts(graph: &Graph) -> Vec<usize> {
57/// let mut values = vec![0; graph.node_capacity()];
58///
59/// let nodes: Vec<usize> = graph.topo().collect();
60///
61/// for node in nodes.iter().rev() {
62/// for target in graph.neighbors(*node, true) {
63/// values[*node] += values[target] + 1;
64/// }
65/// }
66///
67/// values
68/// }
69/// ```
70
71#[derive(Debug)]
72pub struct Graph {
73 inner: StableGraph<NodeWeight, EdgeWeight>,
74 size_map: HashMap<String, usize>,
75 std: Option<NodeIndex>,
76 root: NodeIndex,
77}
78
79impl Graph {
80 /// Create a new graph from the given `cargo-tree` and `cargo-bloat` outputs, with optional `std`
81 /// standalone node.
82 ///
83 /// The `bin` parameter is needed for accurate size accounting if the binary name is different
84 /// from its crate name.
85 ///
86 /// * `cargo_tree_output` should be the output of
87 /// `cargo tree --edges=no-build,no-proc-macro,no-dev,features --prefix=depth --color=never ...`
88 /// * `cargo_bloat_output` should be the output of
89 /// `cargo bloat -n0 --message-format=json --crates ...`
90 ///
91 /// # Panics
92 /// May panic if the cargo outputs are malformed.
93 pub fn new(
94 cargo_tree_output: &str,
95 cargo_bloat_output: &str,
96 std: bool,
97 bin: Option<&str>,
98 ) -> Self {
99 let mut inner = get_dep_graph(cargo_tree_output);
100 let mut size_map = get_size_map(cargo_bloat_output);
101 if let Some(bin) = bin {
102 let size = size_map.get(bin).copied().unwrap_or_default();
103 let root_name = inner.node_weight(NodeIndex::new(0)).unwrap().short();
104 *size_map.get_mut(root_name).unwrap() += size;
105 }
106 let std = std.then(|| {
107 let weight = NodeWeight {
108 name: "std ".to_string(),
109 short_end: 3,
110 features: BTreeMap::new(),
111 };
112 inner.add_node(weight)
113 });
114 inner.shrink_to_fit();
115 let mut graph = Graph {
116 inner,
117 size_map,
118 std,
119 root: NodeIndex::new(0),
120 };
121 graph.normalize_sizes();
122 graph
123 }
124
125 /// Get the index of the `std` standalone node, if it exists.
126 pub fn std(&self) -> Option<usize> {
127 self.std.map(|i| i.index())
128 }
129
130 /// Get the index of the root node.
131 pub fn root(&self) -> usize {
132 self.root.index()
133 }
134
135 /// Get the number of nodes currently in the graph.
136 ///
137 /// Node indices may be greater than this value if nodes have been removed.
138 /// Use [`node_capacity`](Self::node_capacity) instead for allocation.
139 pub fn node_count(&self) -> usize {
140 self.inner.node_count()
141 }
142
143 /// Get the capacity of nodes in the graph.
144 ///
145 /// This is the upper bound of node indices.
146 pub fn node_capacity(&self) -> usize {
147 self.inner.capacity().0
148 }
149
150 /// Get an iterator over the node indices of the graph.
151 pub fn node_indices(&self) -> impl Iterator<Item = usize> {
152 self.inner.node_indices().map(|i| i.index())
153 }
154
155 /// Get the weight of the node at the given index.
156 ///
157 /// # Panics
158 /// Panics if the node does not exist in the graph.
159 pub fn node_weight(&self, index: usize) -> &NodeWeight {
160 self.inner.node_weight(NodeIndex::new(index)).unwrap()
161 }
162
163 /// Get the weight of the edge at the given index.
164 ///
165 /// # Panics
166 /// Panics if the source or target node, or the directed edge between them, does not exist in the graph.
167 ///
168 /// ```
169 /// # use pugio_lib::graph::{EdgeWeight, Graph};
170 /// fn edge_iter(graph: &Graph) -> impl Iterator<Item = &EdgeWeight> {
171 /// graph.node_indices().flat_map(move |i| {
172 /// graph.neighbors(i, true).map(move |j| graph.edge_weight(i, j))
173 /// })
174 /// }
175 /// ```
176 pub fn edge_weight(&self, source: usize, target: usize) -> &EdgeWeight {
177 self.inner
178 .edge_weight(
179 self.inner
180 .find_edge(NodeIndex::new(source), NodeIndex::new(target))
181 .unwrap(),
182 )
183 .unwrap()
184 }
185
186 /// Get the size of the node at the given index.
187 ///
188 /// Returns `None` if its name is not in the size map.
189 ///
190 /// # Panics
191 /// Panics if the node does not exist in the graph.
192 pub fn size(&self, index: usize) -> Option<usize> {
193 let short_name = self
194 .inner
195 .node_weight(NodeIndex::new(index))
196 .unwrap()
197 .short();
198 self.size_map.get(short_name).copied()
199 }
200
201 fn normalize_sizes(&mut self) {
202 let inner = &self.inner;
203
204 let mut counts = HashMap::with_capacity(inner.node_count());
205 for node in inner.node_weights() {
206 *counts.entry(node.short()).or_default() += 1;
207 }
208
209 for (name, size) in self.size_map.iter_mut() {
210 let count = counts.get(name.as_str()).copied().unwrap_or(1);
211 *size /= count;
212 }
213 }
214
215 fn node_classes(&self, is_dir_down: bool) -> Vec<Vec<usize>> {
216 let graph = &self.inner;
217
218 let mut classes = vec![Vec::new(); graph.capacity().0];
219 let nodes = Topo::new(&graph).iter(&graph).collect::<Vec<_>>();
220
221 if is_dir_down {
222 for node in nodes.iter() {
223 classes[node.index()].push(node.index());
224 for target in graph.neighbors(*node) {
225 // ASSERT: graph is known to be DAG, hence no reflexive edge
226 let [source, target] = classes
227 .get_disjoint_mut([node.index(), target.index()])
228 .unwrap();
229 target.extend_from_slice(source);
230 }
231 }
232 } else {
233 for node in nodes.iter().rev() {
234 classes[node.index()].push(node.index());
235 for target in graph.neighbors(*node) {
236 // ASSERT: graph is known to be DAG, hence no reflexive edge
237 let [source, target] = classes
238 .get_disjoint_mut([node.index(), target.index()])
239 .unwrap();
240 source.extend_from_slice(target);
241 }
242 }
243 }
244
245 classes
246 }
247
248 /// Get an iterator over the node indices of the graph in depth-first search order from the root.
249 pub fn dfs(&self) -> impl Iterator<Item = usize> {
250 Dfs::new(&self.inner, self.root)
251 .iter(&self.inner)
252 .map(|i| i.index())
253 }
254
255 /// Get an iterator over the node indices of the graph in breadth-first search order from the root.
256 pub fn bfs(&self) -> impl Iterator<Item = usize> {
257 Bfs::new(&self.inner, self.root)
258 .iter(&self.inner)
259 .map(|i| i.index())
260 }
261
262 /// Get an iterator over the node indices of the graph in topological order.
263 pub fn topo(&self) -> impl Iterator<Item = usize> {
264 Topo::new(&self.inner).iter(&self.inner).map(|i| i.index())
265 }
266
267 /// Get an iterator over the neighboring node indices of the given node index.
268 ///
269 /// If `outgoing` is `true`, get the outgoing neighbors (i.e., dependencies),
270 /// otherwise get the incoming neighbors (i.e., dependents).
271 pub fn neighbors(&self, index: usize, outgoing: bool) -> impl Iterator<Item = usize> {
272 let index = NodeIndex::new(index);
273 let direction = if outgoing {
274 petgraph::Direction::Outgoing
275 } else {
276 petgraph::Direction::Incoming
277 };
278 self.inner
279 .neighbors_directed(index, direction)
280 .map(|i| i.index())
281 }
282
283 /// Remove all nodes that are deeper than `max_depth` from the root, and any nodes that
284 /// are subsequently not reachable from the root.
285 pub fn remove_deep_deps(&mut self, max_depth: usize) {
286 let inner = &mut self.inner;
287
288 // TODO: use petgraph#868 once merged
289 let mut queue = VecDeque::from([(self.root, 0)]);
290 let mut has_visited = vec![false; inner.capacity().0];
291 has_visited[self.root.index()] = true;
292
293 while let Some((node, depth)) = queue.pop_front()
294 && depth < max_depth
295 {
296 for target in inner.neighbors(node) {
297 if !has_visited[target.index()] {
298 queue.push_back((target, depth + 1));
299 has_visited[target.index()] = true;
300 }
301 }
302 }
303
304 remove_not_visited(inner, &has_visited, self.std);
305 }
306
307 fn remove_unreachable(&mut self) {
308 let inner = &self.inner;
309 let mut has_visited = vec![false; inner.capacity().0];
310 for node_index in Dfs::new(inner, self.root).iter(inner) {
311 has_visited[node_index.index()] = true;
312 }
313
314 remove_not_visited(&mut self.inner, &has_visited, self.std);
315 }
316
317 /// Remove the nodes at the given indices, and any nodes that are subsequently not reachable
318 /// from the root.
319 pub fn remove_indices(&mut self, indices: impl Iterator<Item = usize>) {
320 let inner = &mut self.inner;
321
322 for index in indices {
323 inner.remove_node(NodeIndex::new(index));
324 }
325
326 self.remove_unreachable();
327 }
328
329 /// Change the root node to the given index, and remove any nodes that are not reachable from
330 /// the new root.
331 pub fn change_root(&mut self, new_root_index: usize) {
332 let index = NodeIndex::new(new_root_index);
333 assert!(self.inner.contains_node(index));
334 self.root = index;
335 self.remove_unreachable();
336 }
337
338 /// Output the graph in DOT format with the given options, templating, coloring values, and
339 /// gradient.
340 ///
341 /// The `values` parameter should have been created from this graph, possibly before any
342 /// node removals.
343 ///
344 /// ```
345 /// # use pugio_lib::graph::Graph;
346 /// use pugio_lib::template::{Template, Templating};
347 /// use pugio_lib::coloring::{Gradient, Values, NodeColoringScheme, NodeColoringGradient, NodeColoringValues};
348 ///
349 /// fn output(graph: &Graph) -> String {
350 /// let template_options = Default::default();
351 /// let template = Template::new(&template_options).unwrap();
352 /// let values = Some(NodeColoringValues::new(graph, NodeColoringScheme::CumSum));
353 /// let gradient = NodeColoringGradient::Viridis;
354 ///
355 /// graph.output_dot(&Default::default(), &template, &values, &gradient)
356 /// }
357 ///
358 /// ```
359 pub fn output_dot<C, V, T, R, S, G>(
360 &self,
361 dot_options: &DotOptions,
362 templating: &R,
363 values: &S,
364 gradient: &G,
365 ) -> String
366 where
367 R: Templating<Context = C, Value = V>,
368 S: Values<Context = C, Value = V, Output = T>,
369 G: Gradient<Input = T>,
370 {
371 let classes = dot_options
372 .highlight
373 .map(|is_dir_down| self.node_classes(is_dir_down));
374
375 let node_binding = |_, (i, _): (NodeIndex, _)| {
376 let index = i.index();
377 let size = self.size(index).unwrap_or_default();
378 let width = (size as f32 / 4096.0 + 1.0).log10();
379
380 let context = values.context();
381 let value = values.value(index);
382 let output = values.output(index);
383 let color = gradient.color(output, dot_options.dark_mode, dot_options.inverse_gradient);
384 let color = format!("#{color:X}");
385
386 let (label, tooltip) = templating.node(self, index, value, context);
387
388 let classes = if let Some(classes) = &classes {
389 &classes[index]
390 .iter()
391 .map(|i| format!("node{i}"))
392 .collect::<Vec<_>>()
393 .join(" ")
394 } else {
395 ""
396 };
397
398 format!(
399 r#"class = "{classes}" label = "{label}" tooltip = "{tooltip}" width = {width} fillcolor= "{color}""#,
400 )
401 };
402
403 let edge_binding = |_, e: EdgeReference<'_, EdgeWeight>| {
404 let (label, tooltip) = templating.edge(self, e.source().index(), e.target().index());
405
406 let classes = if let Some(classes) = &classes {
407 let i = if dot_options.highlight.unwrap() {
408 e.source()
409 } else {
410 e.target()
411 };
412 &classes[i.index()]
413 .iter()
414 .map(|i| format!("node{i}"))
415 .collect::<Vec<_>>()
416 .join(" ")
417 } else {
418 ""
419 };
420
421 format!(
422 r#"class = "{classes}" label = "{label}" edgetooltip = "{tooltip}" labeltooltip = "{tooltip}""#
423 )
424 };
425
426 let dot = Dot::with_attr_getters(
427 &self.inner,
428 &[Config::EdgeNoLabel, Config::NodeNoLabel],
429 &edge_binding,
430 &node_binding,
431 );
432
433 format!("{dot:?}")
434 }
435}
436
437fn remove_not_visited(
438 graph: &mut StableGraph<NodeWeight, EdgeWeight>,
439 has_visited: &[bool],
440 std_index: Option<NodeIndex>,
441) {
442 for index in has_visited.iter().enumerate().filter_map(|(i, b)| {
443 let index = NodeIndex::new(i);
444 if !b && Some(index) != std_index {
445 Some(index)
446 } else {
447 None
448 }
449 }) {
450 graph.remove_node(index);
451 }
452}
453
454/// The weight of a node in the dependency graph, representing a crate.
455///
456/// The crate name already has the hyphen `-` replaced with `_` as used in code.
457#[derive(Clone)]
458pub struct NodeWeight {
459 name: String,
460 short_end: usize,
461 pub(crate) features: BTreeMap<String, Vec<String>>,
462}
463
464impl std::fmt::Debug for NodeWeight {
465 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
466 f.debug_struct("NodeWeight")
467 .field("name", &self.name)
468 .field("features", &self.features)
469 .finish()
470 }
471}
472
473impl NodeWeight {
474 pub(crate) fn new(
475 name: String,
476 short_end: usize,
477 features: BTreeMap<String, Vec<String>>,
478 ) -> Self {
479 Self {
480 name,
481 short_end,
482 features,
483 }
484 }
485
486 /// Short name of the crate.
487 ///
488 /// For example, if the full name is `pugio_lib v1.0.0`, this returns `pugio_lib`.
489 pub fn short(&self) -> &str {
490 &self.name[..self.short_end]
491 }
492
493 /// Extra information of the crate, including version and potentially path.
494 ///
495 /// For example, if the full name is `pugio_lib v1.0.0`, this returns `v1.0.0`.
496 pub fn extra(&self) -> &str {
497 &self.name[self.short_end + 1..]
498 }
499
500 /// Full name of the crate.
501 ///
502 /// For example, `pugio_lib v1.0.0`.
503 pub fn full(&self) -> &str {
504 &self.name
505 }
506
507 /// Get the enabled features of the crate.
508 ///
509 /// This returns a map from a feature to features that it directly enable.
510 ///
511 /// For example, if a crate has features:
512 /// - `default = ["a", "b"]`
513 /// - `a = ["c"]`
514 /// - `b = []`
515 /// - `c = []`
516 ///
517 /// This returns a map: `{("default": ["a", "b"]), ("a": ["c"]), ("b": []), ("c": [])}`
518 ///
519 /// This does not include enabled optional dependencies, e.g. `feature = ["dep:crate"]`.
520 /// Dependency features, e.g. `feature = ["crate/feature"]` are represented in [`EdgeWeight`]
521 /// instead.
522 // TODO: report `cargo tree` unable to output feature = ["crate/feature"], which should result
523 // in pugio-lib feature "feature"
524 // |- pugio-lib ...
525 // |- crate feature "feature"
526 // |- crate ...
527 pub fn features(&self) -> &BTreeMap<String, Vec<String>> {
528 &self.features
529 }
530}
531
532/// The weight of a directed edge in the dependency graph, representing a binary relation of
533/// dependency of the source node on the target node.
534#[derive(Debug, Clone)]
535pub struct EdgeWeight {
536 pub(crate) features: BTreeMap<String, Vec<String>>,
537}
538
539impl EdgeWeight {
540 /// Get the features that are enabled by the dependency.
541 ///
542 /// This returns a map from a feature of the source crate to features of the target crate that
543 /// it enables.
544 ///
545 /// For example, if the dependent crate has features `a = ["crate/b", "crate/c"]`, enabling
546 /// features "b" and "c" of the dependency "crate", this returns a map: `{("a": ["b", "c"]}`.
547 pub fn features(&self) -> &BTreeMap<String, Vec<String>> {
548 &self.features
549 }
550}
551
552/// Options for outputting the graph in DOT format.
553#[derive(Debug, Default)]
554pub struct DotOptions {
555 /// If `Some(true)`, highlight nodes in downward direction (dependencies) from the root.
556 ///
557 /// If `Some(false)`, highlight nodes in upward direction (reverse dependencies) to the root.
558 ///
559 /// If `None`, do not highlight any nodes.
560 pub highlight: Option<bool>,
561 /// Name of the binary, if different from the crate name.
562 pub bin: Option<String>,
563 /// If `true`, invert the gradient for coloring.
564 pub inverse_gradient: bool,
565 /// If `true`, use dark mode for coloring.
566 pub dark_mode: bool,
567}