Skip to main content

tldr_cli/commands/remaining/difftastic/
dijkstra.rs

1//! Implements Dijkstra's algorithm for shortest path, to find an
2//! optimal and readable diff between two ASTs.
3//!
4//! Vendored from difftastic with modifications:
5//! - Import paths rewritten from crate:: to super::
6//! - All logging removed (no log/humansize dependency)
7//! - Tests stripped
8//! - DEFAULT_GRAPH_LIMIT inlined
9
10use std::cmp::Reverse;
11
12use bumpalo::Bump;
13use radix_heap::RadixHeapMap;
14
15use super::{
16    changes::ChangeMap,
17    graph::{populate_change_map, set_neighbours, Edge, Vertex},
18    hash::DftHashMap,
19    syntax::Syntax,
20};
21
22/// The default limit on the number of graph nodes before bailing out.
23/// Inlined from crate::options::DEFAULT_GRAPH_LIMIT.
24pub const DEFAULT_GRAPH_LIMIT: usize = 3_000_000;
25
26#[derive(Debug)]
27pub struct ExceededGraphLimit {}
28
29/// Return the shortest route from `start` to the end vertex.
30fn shortest_vertex_path<'s, 'v>(
31    start: &'v Vertex<'s, 'v>,
32    vertex_arena: &'v Bump,
33    size_hint: usize,
34    graph_limit: usize,
35) -> Result<Vec<&'v Vertex<'s, 'v>>, ExceededGraphLimit> {
36    // We want to visit nodes with the shortest distance first, but
37    // RadixHeapMap is a max-heap. Ensure nodes are wrapped with
38    // Reverse to flip comparisons.
39    let mut heap: RadixHeapMap<Reverse<_>, &'v Vertex<'s, 'v>> = RadixHeapMap::new();
40
41    heap.push(Reverse(0), start);
42
43    let mut seen = DftHashMap::default();
44    seen.reserve(size_hint);
45
46    let end: &'v Vertex<'s, 'v> = loop {
47        match heap.pop() {
48            Some((Reverse(distance), current)) => {
49                if current.is_end() {
50                    break current;
51                }
52
53                set_neighbours(current, vertex_arena, &mut seen);
54                for neighbour in *current.neighbours.borrow().as_ref().unwrap() {
55                    let (edge, next) = neighbour;
56                    let distance_to_next = distance + edge.cost();
57
58                    let found_shorter_route = match next.predecessor.get() {
59                        Some((prev_shortest, _)) => distance_to_next < prev_shortest,
60                        None => true,
61                    };
62
63                    if found_shorter_route {
64                        next.predecessor.replace(Some((distance_to_next, current)));
65                        heap.push(Reverse(distance_to_next), next);
66                    }
67                }
68
69                if seen.len() > graph_limit {
70                    return Err(ExceededGraphLimit {});
71                }
72            }
73            None => panic!("Ran out of graph nodes before reaching end"),
74        }
75    };
76
77    let mut current = Some((0, end));
78    let mut vertex_route: Vec<&'v Vertex<'s, 'v>> = vec![];
79    while let Some((_, node)) = current {
80        vertex_route.push(node);
81        current = node.predecessor.get();
82    }
83
84    vertex_route.reverse();
85    Ok(vertex_route)
86}
87
88fn shortest_path_with_edges<'s, 'v>(
89    route: &[&'v Vertex<'s, 'v>],
90) -> Vec<(Edge, &'v Vertex<'s, 'v>)> {
91    let mut prev = route.first().expect("Expected non-empty route");
92
93    let mut res = vec![];
94
95    for vertex in route.iter().skip(1) {
96        let edge = edge_between(prev, vertex);
97        res.push((edge, *prev));
98
99        prev = vertex;
100    }
101
102    res
103}
104
105/// Return the shortest route from the `start` to the end vertex.
106///
107/// The vec returned does not return the very last vertex. This is
108/// necessary because a route of N vertices only has N-1 edges.
109fn shortest_path<'s, 'v>(
110    start: Vertex<'s, 'v>,
111    vertex_arena: &'v Bump,
112    size_hint: usize,
113    graph_limit: usize,
114) -> Result<Vec<(Edge, &'v Vertex<'s, 'v>)>, ExceededGraphLimit> {
115    let start: &'v Vertex<'s, 'v> = vertex_arena.alloc(start);
116    let vertex_path = shortest_vertex_path(start, vertex_arena, size_hint, graph_limit)?;
117    Ok(shortest_path_with_edges(&vertex_path))
118}
119
120fn edge_between<'s, 'v>(before: &Vertex<'s, 'v>, after: &Vertex<'s, 'v>) -> Edge {
121    assert_ne!(before, after);
122
123    let mut shortest_edge: Option<Edge> = None;
124    if let Some(neighbours) = &*before.neighbours.borrow() {
125        for neighbour in *neighbours {
126            let (edge, next) = *neighbour;
127            // If there are multiple edges that can take us to `next`,
128            // prefer the shortest.
129            if *next == *after {
130                let is_shorter = match shortest_edge {
131                    Some(prev_edge) => edge.cost() < prev_edge.cost(),
132                    None => true,
133                };
134
135                if is_shorter {
136                    shortest_edge = Some(edge);
137                }
138            }
139        }
140    }
141
142    if let Some(edge) = shortest_edge {
143        return edge;
144    }
145
146    panic!(
147        "Expected a route between the two vertices {:#?} and {:#?}",
148        before, after
149    );
150}
151
152/// What is the total number of AST nodes?
153fn node_count(root: Option<&Syntax>) -> u32 {
154    let iter = std::iter::successors(root, |node| node.next_sibling());
155
156    iter.map(|node| match node {
157        Syntax::List {
158            num_descendants, ..
159        } => *num_descendants,
160        Syntax::Atom { .. } => 1,
161    })
162    .sum::<u32>()
163}
164
165pub fn mark_syntax<'a>(
166    lhs_syntax: Option<&'a Syntax<'a>>,
167    rhs_syntax: Option<&'a Syntax<'a>>,
168    change_map: &mut ChangeMap<'a>,
169    graph_limit: usize,
170) -> Result<(), ExceededGraphLimit> {
171    let lhs_node_count = node_count(lhs_syntax) as usize;
172    let rhs_node_count = node_count(rhs_syntax) as usize;
173
174    // When there are a large number of changes, we end up building a
175    // graph whose size is roughly quadratic. Use this as a size hint,
176    // so we don't spend too much time re-hashing and expanding the
177    // predecessors hashmap.
178    //
179    // Cap this number to the graph limit, so we don't try to allocate
180    // an absurdly large (i.e. greater than physical memory) hashmap
181    // when there is a large number of nodes. We'll never visit more
182    // than graph_limit nodes.
183    let size_hint = std::cmp::min(lhs_node_count * rhs_node_count, graph_limit);
184
185    let start = Vertex::new(lhs_syntax, rhs_syntax);
186    let vertex_arena = Bump::new();
187
188    let route = shortest_path(start, &vertex_arena, size_hint, graph_limit)?;
189
190    populate_change_map(&route, change_map);
191    Ok(())
192}