Skip to main content

tldr_cli/commands/remaining/difftastic/
graph.rs

1//! A graph representation for computing tree diffs.
2//!
3//! Vendored from difftastic with modifications:
4//! - Import paths rewritten from crate:: to super::
5//! - Tests stripped
6//! - Visibility changed to pub for cross-module use
7
8use std::{
9    cell::{Cell, RefCell},
10    cmp::min,
11    fmt,
12    hash::{Hash, Hasher},
13};
14
15use bumpalo::Bump;
16use hashbrown::hash_map::RawEntryMut;
17use smallvec::{smallvec, SmallVec};
18use strsim::normalized_levenshtein;
19
20use self::Edge::*;
21use super::{
22    changes::{insert_deep_unchanged, ChangeKind, ChangeMap},
23    hash::DftHashMap,
24    stack::Stack,
25    syntax::{AtomKind, Syntax, SyntaxId},
26};
27
28/// A vertex in a directed acyclic graph that represents a diff.
29///
30/// Each vertex represents two pointers: one to the next unmatched LHS
31/// syntax, and one to the next unmatched RHS syntax.
32///
33/// For example, suppose we have `X A` on the LHS and `A` on the
34/// RHS. Our start vertex looks like this.
35///
36/// ```text
37/// LHS: X A     RHS: A
38///      ^            ^
39/// ```
40///
41/// From this vertex, we could take [`Edge::NovelAtomLHS`], bringing
42/// us to this vertex.
43///
44/// ```text
45/// LHS: X A     RHS: A
46///        ^          ^
47/// ```
48///
49/// Alternatively, we could take the [`Edge::NovelAtomRHS`], bringing us
50/// to this vertex.
51///
52/// ```text
53/// LHS: X A     RHS: A
54///      ^              ^
55/// ```
56///
57/// Vertices are arena allocated (the 'v lifetime) and have references
58/// to syntax nodes (the 's lifetime).
59#[derive(Debug, Clone)]
60pub struct Vertex<'s, 'v> {
61    pub neighbours: RefCell<Option<&'v [(Edge, &'v Vertex<'s, 'v>)]>>,
62    pub predecessor: Cell<Option<(u32, &'v Vertex<'s, 'v>)>>,
63    // TODO: experiment with storing SyntaxId only, and have a HashMap
64    // from SyntaxId to &Syntax.
65    pub lhs_syntax: Option<&'s Syntax<'s>>,
66    pub rhs_syntax: Option<&'s Syntax<'s>>,
67    parents: Stack<'v, EnteredDelimiter<'s, 'v>>,
68    lhs_parent_id: Option<SyntaxId>,
69    rhs_parent_id: Option<SyntaxId>,
70}
71
72impl PartialEq for Vertex<'_, '_> {
73    fn eq(&self, other: &Self) -> bool {
74        // Strictly speaking, we should compare the whole
75        // EnteredDelimiter stack, not just the immediate
76        // parents. By taking the immediate parent, we have
77        // vertices with different stacks that are 'equal'.
78        //
79        // This makes the graph traversal path dependent: the
80        // first vertex we see 'wins', and we use it for deciding
81        // how we can pop.
82        //
83        // In practice this seems to work well. The first vertex
84        // has the lowest cost, so has the most PopBoth
85        // occurrences, which is the best outcome.
86        //
87        // Handling this properly would require considering many
88        // more vertices to be distinct, exponentially increasing
89        // the graph size relative to tree depth.
90        let b0 = match (self.lhs_syntax, other.lhs_syntax) {
91            (Some(s0), Some(s1)) => s0.id() == s1.id(),
92            (None, None) => self.lhs_parent_id == other.lhs_parent_id,
93            _ => false,
94        };
95        let b1 = match (self.rhs_syntax, other.rhs_syntax) {
96            (Some(s0), Some(s1)) => s0.id() == s1.id(),
97            (None, None) => self.rhs_parent_id == other.rhs_parent_id,
98            _ => false,
99        };
100        // We do want to distinguish whether we can pop each side
101        // independently though. Without this, if we find a case
102        // where we can pop sides together, we don't consider the
103        // case where we get a better diff by popping each side
104        // separately.
105        let b2 = can_pop_either_parent(&self.parents) == can_pop_either_parent(&other.parents);
106
107        b0 && b1 && b2
108    }
109}
110impl Eq for Vertex<'_, '_> {}
111
112impl Hash for Vertex<'_, '_> {
113    fn hash<H: Hasher>(&self, state: &mut H) {
114        self.lhs_syntax.map(|node| node.id()).hash(state);
115        self.rhs_syntax.map(|node| node.id()).hash(state);
116
117        self.lhs_parent_id.hash(state);
118        self.rhs_parent_id.hash(state);
119        can_pop_either_parent(&self.parents).hash(state);
120    }
121}
122
123/// Tracks entering syntax List nodes.
124#[derive(Clone, PartialEq)]
125enum EnteredDelimiter<'s, 'v> {
126    /// If we've entered the LHS or RHS separately, we can pop either
127    /// side independently.
128    ///
129    /// Assumes that at least one stack is non-empty.
130    PopEither((Stack<'v, &'s Syntax<'s>>, Stack<'v, &'s Syntax<'s>>)),
131    /// If we've entered the LHS and RHS together, we must pop both
132    /// sides together too. Otherwise we'd consider the following case to have no changes.
133    ///
134    /// ```text
135    /// Old: (a b c)
136    /// New: (a b) c
137    /// ```
138    PopBoth((&'s Syntax<'s>, &'s Syntax<'s>)),
139}
140
141impl fmt::Debug for EnteredDelimiter<'_, '_> {
142    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
143        let desc = match self {
144            Self::PopEither((lhs_delims, rhs_delims)) => {
145                format!(
146                    "PopEither(lhs count: {}, rhs count: {})",
147                    lhs_delims.size(),
148                    rhs_delims.size()
149                )
150            }
151            Self::PopBoth(_) => "PopBoth".to_owned(),
152        };
153        f.write_str(&desc)
154    }
155}
156
157fn push_both_delimiters<'s, 'v>(
158    entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
159    lhs_delim: &'s Syntax<'s>,
160    rhs_delim: &'s Syntax<'s>,
161    alloc: &'v Bump,
162) -> Stack<'v, EnteredDelimiter<'s, 'v>> {
163    entered.push(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim)), alloc)
164}
165
166fn can_pop_either_parent(entered: &Stack<EnteredDelimiter>) -> bool {
167    matches!(entered.peek(), Some(EnteredDelimiter::PopEither(_)))
168}
169
170fn try_pop_both<'s, 'v>(
171    entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
172) -> Option<(
173    &'s Syntax<'s>,
174    &'s Syntax<'s>,
175    Stack<'v, EnteredDelimiter<'s, 'v>>,
176)> {
177    match entered.peek() {
178        Some(EnteredDelimiter::PopBoth((lhs_delim, rhs_delim))) => {
179            Some((lhs_delim, rhs_delim, entered.pop().unwrap()))
180        }
181        _ => None,
182    }
183}
184
185fn try_pop_lhs<'s, 'v>(
186    entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
187    alloc: &'v Bump,
188) -> Option<(&'s Syntax<'s>, Stack<'v, EnteredDelimiter<'s, 'v>>)> {
189    match entered.peek() {
190        Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match lhs_delims.peek() {
191            Some(lhs_delim) => {
192                let mut entered = entered.pop().unwrap();
193                let new_lhs_delims = lhs_delims.pop().unwrap();
194
195                if !new_lhs_delims.is_empty() || !rhs_delims.is_empty() {
196                    entered = entered.push(
197                        EnteredDelimiter::PopEither((new_lhs_delims, rhs_delims.clone())),
198                        alloc,
199                    );
200                }
201
202                Some((lhs_delim, entered))
203            }
204            None => None,
205        },
206        _ => None,
207    }
208}
209
210fn try_pop_rhs<'s, 'v>(
211    entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
212    alloc: &'v Bump,
213) -> Option<(&'s Syntax<'s>, Stack<'v, EnteredDelimiter<'s, 'v>>)> {
214    match entered.peek() {
215        Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => match rhs_delims.peek() {
216            Some(rhs_delim) => {
217                let mut entered = entered.pop().unwrap();
218                let new_rhs_delims = rhs_delims.pop().unwrap();
219
220                if !lhs_delims.is_empty() || !new_rhs_delims.is_empty() {
221                    entered = entered.push(
222                        EnteredDelimiter::PopEither((lhs_delims.clone(), new_rhs_delims)),
223                        alloc,
224                    );
225                }
226
227                Some((rhs_delim, entered))
228            }
229            None => None,
230        },
231        _ => None,
232    }
233}
234
235fn push_lhs_delimiter<'s, 'v>(
236    entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
237    delimiter: &'s Syntax<'s>,
238    alloc: &'v Bump,
239) -> Stack<'v, EnteredDelimiter<'s, 'v>> {
240    match entered.peek() {
241        Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push(
242            EnteredDelimiter::PopEither((lhs_delims.push(delimiter, alloc), rhs_delims.clone())),
243            alloc,
244        ),
245        _ => entered.push(
246            EnteredDelimiter::PopEither((Stack::new().push(delimiter, alloc), Stack::new())),
247            alloc,
248        ),
249    }
250}
251
252fn push_rhs_delimiter<'s, 'v>(
253    entered: &Stack<'v, EnteredDelimiter<'s, 'v>>,
254    delimiter: &'s Syntax<'s>,
255    alloc: &'v Bump,
256) -> Stack<'v, EnteredDelimiter<'s, 'v>> {
257    match entered.peek() {
258        Some(EnteredDelimiter::PopEither((lhs_delims, rhs_delims))) => entered.pop().unwrap().push(
259            EnteredDelimiter::PopEither((lhs_delims.clone(), rhs_delims.push(delimiter, alloc))),
260            alloc,
261        ),
262        _ => entered.push(
263            EnteredDelimiter::PopEither((Stack::new(), Stack::new().push(delimiter, alloc))),
264            alloc,
265        ),
266    }
267}
268
269impl<'s, 'v> Vertex<'s, 'v> {
270    pub fn is_end(&self) -> bool {
271        self.lhs_syntax.is_none() && self.rhs_syntax.is_none() && self.parents.is_empty()
272    }
273
274    pub fn new(lhs_syntax: Option<&'s Syntax<'s>>, rhs_syntax: Option<&'s Syntax<'s>>) -> Self {
275        let parents = Stack::new();
276        Vertex {
277            neighbours: RefCell::new(None),
278            predecessor: Cell::new(None),
279            lhs_syntax,
280            rhs_syntax,
281            parents,
282            lhs_parent_id: None,
283            rhs_parent_id: None,
284        }
285    }
286}
287
288/// An edge in our graph, with an associated [`cost`](Edge::cost).
289///
290/// A syntax node can always be marked as novel, so a vertex will have
291/// at least a NovelFoo edge. Depending on the syntax nodes of the
292/// current [`Vertex`], other edges may also be available.
293///
294/// See [`set_neighbours`] for all the edges available for a given `Vertex`.
295#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
296pub enum Edge {
297    UnchangedNode {
298        depth_difference: u32,
299        /// Is this node just punctuation? We penalise this case,
300        /// because it's more useful to match e.g. a variable name
301        /// than a comma.
302        probably_punctuation: bool,
303    },
304    EnterUnchangedDelimiter {
305        depth_difference: u32,
306    },
307    ReplacedComment {
308        levenshtein_pct: u8,
309    },
310    ReplacedString {
311        levenshtein_pct: u8,
312    },
313    NovelAtomLHS {},
314    NovelAtomRHS {},
315    // TODO: An EnterNovelDelimiterBoth edge might help performance
316    // rather doing LHS and RHS separately.
317    EnterNovelDelimiterLHS {},
318    EnterNovelDelimiterRHS {},
319}
320
321impl Edge {
322    pub fn cost(self) -> u32 {
323        match self {
324            // Matching nodes is always best.
325            UnchangedNode {
326                depth_difference,
327                probably_punctuation,
328            } => {
329                // TODO: Perhaps prefer matching longer strings? It's
330                // probably easier to read.
331
332                // The cost for unchanged nodes can be as low as 1,
333                // but we penalise nodes that have a different depth
334                // difference, capped at 40.
335                let base = min(40, depth_difference + 1);
336
337                // If the node is only punctuation, increase the
338                // cost. It's better to have unchanged variable names
339                // and novel punctuation than the reverse.
340                //
341                // We want a sufficiently large punctuation cost such
342                // that unchanged variables always win, even if there
343                // are replacement edges elsewhere.
344                //
345                // Replacement edges have a cost between 500 and 600,
346                // so they can be up to 100 less than two novel nodes.
347                // If we have replacements either side of a node
348                // (e.g. see comma_and_comment_1.js), then that's
349                // potentially a cost difference of 200.
350                base + if probably_punctuation { 200 } else { 0 }
351            }
352            // Matching an outer delimiter is good.
353            EnterUnchangedDelimiter { depth_difference } => 100 + min(40, depth_difference),
354
355            // Otherwise, we've added/removed a node.
356            NovelAtomLHS {} | NovelAtomRHS {} => 300,
357            EnterNovelDelimiterLHS { .. } | EnterNovelDelimiterRHS { .. } => 300,
358            // Replacing a comment is better than treating it as
359            // novel. However, since ReplacedComment is an alternative
360            // to NovelAtomLHS and NovelAtomRHS, we need to be
361            // slightly less than 2 * 300.
362            ReplacedComment { levenshtein_pct } | ReplacedString { levenshtein_pct } => {
363                500 + u32::from(100 - levenshtein_pct)
364            }
365        }
366    }
367}
368
369fn allocate_if_new<'s, 'v>(
370    v: Vertex<'s, 'v>,
371    alloc: &'v Bump,
372    seen: &mut DftHashMap<&Vertex<'s, 'v>, SmallVec<[&'v Vertex<'s, 'v>; 2]>>,
373) -> &'v Vertex<'s, 'v> {
374    // We use the entry API so that we only need to do a single lookup
375    // for access and insert.
376    match seen.raw_entry_mut().from_key(&v) {
377        RawEntryMut::Occupied(mut occupied) => {
378            let existing = occupied.get_mut();
379
380            // Don't explore more than two possible parenthesis
381            // nestings for each syntax node pair.
382            if let Some(allocated) = existing.last() {
383                if existing.len() >= 2 {
384                    return allocated;
385                }
386            }
387
388            // If we have seen exactly this graph node before, even
389            // considering parenthesis matching, return it.
390            for existing_node in existing.iter() {
391                if existing_node.parents == v.parents {
392                    return existing_node;
393                }
394            }
395
396            // We haven't reached the graph node limit yet, allocate a
397            // new one.
398            let allocated = alloc.alloc(v);
399            existing.push(allocated);
400            allocated
401        }
402        RawEntryMut::Vacant(vacant) => {
403            let allocated = alloc.alloc(v);
404
405            // We know that this vec will never have more than 2
406            // nodes, and this code is very hot, so use a smallvec.
407            //
408            // We still use a vec to enable experiments with the value
409            // of how many possible parenthesis nestings to explore.
410            let existing: SmallVec<[&'v Vertex<'s, 'v>; 2]> = smallvec![&*allocated];
411
412            vacant.insert(allocated, existing);
413            allocated
414        }
415    }
416}
417
418/// Does this node look like punctuation?
419///
420/// This check is deliberately conservative, because it's hard to
421/// accurately recognise punctuation in a language-agnostic way.
422fn looks_like_punctuation(node: &Syntax) -> bool {
423    match node {
424        Syntax::Atom { content, .. } => content == "," || content == ";" || content == ".",
425        _ => false,
426    }
427}
428
429/// Pop as many parents of `lhs_node` and `rhs_node` as
430/// possible. Return the new syntax nodes and parents.
431type PoppedParents<'s, 'v> = (
432    Option<&'s Syntax<'s>>,
433    Option<&'s Syntax<'s>>,
434    Option<SyntaxId>,
435    Option<SyntaxId>,
436    Stack<'v, EnteredDelimiter<'s, 'v>>,
437);
438
439fn pop_all_parents<'s, 'v>(
440    lhs_node: Option<&'s Syntax<'s>>,
441    rhs_node: Option<&'s Syntax<'s>>,
442    lhs_parent_id: Option<SyntaxId>,
443    rhs_parent_id: Option<SyntaxId>,
444    parents: &Stack<'v, EnteredDelimiter<'s, 'v>>,
445    alloc: &'v Bump,
446) -> PoppedParents<'s, 'v> {
447    let mut lhs_node = lhs_node;
448    let mut rhs_node = rhs_node;
449    let mut lhs_parent_id = lhs_parent_id;
450    let mut rhs_parent_id = rhs_parent_id;
451    let mut parents = parents.clone();
452
453    loop {
454        if lhs_node.is_none() {
455            if let Some((lhs_parent, parents_next)) = try_pop_lhs(&parents, alloc) {
456                // Move to next after LHS parent.
457
458                // Continue from sibling of parent.
459                lhs_node = lhs_parent.next_sibling();
460                lhs_parent_id = lhs_parent.parent().map(Syntax::id);
461                parents = parents_next;
462                continue;
463            }
464        }
465
466        if rhs_node.is_none() {
467            if let Some((rhs_parent, parents_next)) = try_pop_rhs(&parents, alloc) {
468                // Move to next after RHS parent.
469
470                // Continue from sibling of parent.
471                rhs_node = rhs_parent.next_sibling();
472                rhs_parent_id = rhs_parent.parent().map(Syntax::id);
473                parents = parents_next;
474                continue;
475            }
476        }
477
478        if lhs_node.is_none() && rhs_node.is_none() {
479            // We have exhausted all the nodes on both lists, so we can
480            // move up to the parent node.
481
482            // Continue from sibling of parent.
483            if let Some((lhs_parent, rhs_parent, parents_next)) = try_pop_both(&parents) {
484                lhs_node = lhs_parent.next_sibling();
485                rhs_node = rhs_parent.next_sibling();
486                lhs_parent_id = lhs_parent.parent().map(Syntax::id);
487                rhs_parent_id = rhs_parent.parent().map(Syntax::id);
488                parents = parents_next;
489                continue;
490            }
491        }
492
493        break;
494    }
495
496    (lhs_node, rhs_node, lhs_parent_id, rhs_parent_id, parents)
497}
498
499/// Compute the neighbours of `v` if we haven't previously done so,
500/// and write them to the .neighbours cell inside `v`.
501pub fn set_neighbours<'s, 'v>(
502    v: &Vertex<'s, 'v>,
503    alloc: &'v Bump,
504    seen: &mut DftHashMap<&Vertex<'s, 'v>, SmallVec<[&'v Vertex<'s, 'v>; 2]>>,
505) {
506    if v.neighbours.borrow().is_some() {
507        return;
508    }
509
510    // There are only seven pushes in this function, so that's sufficient.
511    let mut neighbours: Vec<(Edge, &Vertex)> = Vec::with_capacity(7);
512
513    if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) {
514        if lhs_syntax == rhs_syntax {
515            let depth_difference = (lhs_syntax.num_ancestors() as i32
516                - rhs_syntax.num_ancestors() as i32)
517                .unsigned_abs();
518
519            let probably_punctuation = looks_like_punctuation(lhs_syntax);
520
521            // Both nodes are equal, the happy case.
522            let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) = pop_all_parents(
523                lhs_syntax.next_sibling(),
524                rhs_syntax.next_sibling(),
525                v.lhs_parent_id,
526                v.rhs_parent_id,
527                &v.parents,
528                alloc,
529            );
530
531            neighbours.push((
532                UnchangedNode {
533                    depth_difference,
534                    probably_punctuation,
535                },
536                allocate_if_new(
537                    Vertex {
538                        neighbours: RefCell::new(None),
539                        predecessor: Cell::new(None),
540                        lhs_syntax,
541                        rhs_syntax,
542                        parents,
543                        lhs_parent_id,
544                        rhs_parent_id,
545                    },
546                    alloc,
547                    seen,
548                ),
549            ));
550        }
551
552        if let (
553            Syntax::List {
554                open_content: lhs_open_content,
555                close_content: lhs_close_content,
556                children: lhs_children,
557                ..
558            },
559            Syntax::List {
560                open_content: rhs_open_content,
561                close_content: rhs_close_content,
562                children: rhs_children,
563                ..
564            },
565        ) = (lhs_syntax, rhs_syntax)
566        {
567            // The list delimiters are equal, but children may not be.
568            if lhs_open_content == rhs_open_content && lhs_close_content == rhs_close_content {
569                let lhs_next = lhs_children.first().copied();
570                let rhs_next = rhs_children.first().copied();
571
572                // TODO: be consistent between parents_next and next_parents.
573                let parents_next = push_both_delimiters(&v.parents, lhs_syntax, rhs_syntax, alloc);
574
575                let depth_difference = (lhs_syntax.num_ancestors() as i32
576                    - rhs_syntax.num_ancestors() as i32)
577                    .unsigned_abs();
578
579                // When we enter a list, we may need to immediately
580                // pop several levels if the list has no children.
581                let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
582                    pop_all_parents(
583                        lhs_next,
584                        rhs_next,
585                        Some(lhs_syntax.id()),
586                        Some(rhs_syntax.id()),
587                        &parents_next,
588                        alloc,
589                    );
590
591                neighbours.push((
592                    EnterUnchangedDelimiter { depth_difference },
593                    allocate_if_new(
594                        Vertex {
595                            neighbours: RefCell::new(None),
596                            predecessor: Cell::new(None),
597                            lhs_syntax,
598                            rhs_syntax,
599                            parents,
600                            lhs_parent_id,
601                            rhs_parent_id,
602                        },
603                        alloc,
604                        seen,
605                    ),
606                ));
607            }
608        }
609
610        if let (
611            Syntax::Atom {
612                content: lhs_content,
613                kind: lhs_kind @ AtomKind::Comment | lhs_kind @ AtomKind::String(_),
614                ..
615            },
616            Syntax::Atom {
617                content: rhs_content,
618                kind: rhs_kind @ AtomKind::Comment | rhs_kind @ AtomKind::String(_),
619                ..
620            },
621        ) = (lhs_syntax, rhs_syntax)
622        {
623            // Both sides are comments/both sides are strings and
624            // their content is reasonably similar.
625            if lhs_kind == rhs_kind && lhs_content != rhs_content {
626                let levenshtein_pct =
627                    (normalized_levenshtein(lhs_content, rhs_content) * 100.0).round() as u8;
628                let edge = if lhs_kind == &AtomKind::Comment {
629                    ReplacedComment { levenshtein_pct }
630                } else {
631                    ReplacedString { levenshtein_pct }
632                };
633
634                let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
635                    pop_all_parents(
636                        lhs_syntax.next_sibling(),
637                        rhs_syntax.next_sibling(),
638                        v.lhs_parent_id,
639                        v.rhs_parent_id,
640                        &v.parents,
641                        alloc,
642                    );
643                neighbours.push((
644                    edge,
645                    allocate_if_new(
646                        Vertex {
647                            neighbours: RefCell::new(None),
648                            predecessor: Cell::new(None),
649                            lhs_syntax,
650                            rhs_syntax,
651                            parents,
652                            lhs_parent_id,
653                            rhs_parent_id,
654                        },
655                        alloc,
656                        seen,
657                    ),
658                ));
659            }
660        }
661    }
662
663    if let Some(lhs_syntax) = &v.lhs_syntax {
664        match lhs_syntax {
665            // Step over this novel atom.
666            Syntax::Atom { .. } => {
667                let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
668                    pop_all_parents(
669                        lhs_syntax.next_sibling(),
670                        v.rhs_syntax,
671                        v.lhs_parent_id,
672                        v.rhs_parent_id,
673                        &v.parents,
674                        alloc,
675                    );
676
677                neighbours.push((
678                    NovelAtomLHS {},
679                    allocate_if_new(
680                        Vertex {
681                            neighbours: RefCell::new(None),
682                            predecessor: Cell::new(None),
683                            lhs_syntax,
684                            rhs_syntax,
685                            parents,
686                            lhs_parent_id,
687                            rhs_parent_id,
688                        },
689                        alloc,
690                        seen,
691                    ),
692                ));
693            }
694            // Step into this partially/fully novel list.
695            Syntax::List { children, .. } => {
696                let lhs_next = children.first().copied();
697
698                let parents_next = push_lhs_delimiter(&v.parents, lhs_syntax, alloc);
699
700                let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
701                    pop_all_parents(
702                        lhs_next,
703                        v.rhs_syntax,
704                        Some(lhs_syntax.id()),
705                        v.rhs_parent_id,
706                        &parents_next,
707                        alloc,
708                    );
709
710                neighbours.push((
711                    EnterNovelDelimiterLHS {},
712                    allocate_if_new(
713                        Vertex {
714                            neighbours: RefCell::new(None),
715                            predecessor: Cell::new(None),
716                            lhs_syntax,
717                            rhs_syntax,
718                            parents,
719                            lhs_parent_id,
720                            rhs_parent_id,
721                        },
722                        alloc,
723                        seen,
724                    ),
725                ));
726            }
727        }
728    }
729
730    if let Some(rhs_syntax) = &v.rhs_syntax {
731        match rhs_syntax {
732            // Step over this novel atom.
733            Syntax::Atom { .. } => {
734                let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
735                    pop_all_parents(
736                        v.lhs_syntax,
737                        rhs_syntax.next_sibling(),
738                        v.lhs_parent_id,
739                        v.rhs_parent_id,
740                        &v.parents,
741                        alloc,
742                    );
743
744                neighbours.push((
745                    NovelAtomRHS {},
746                    allocate_if_new(
747                        Vertex {
748                            neighbours: RefCell::new(None),
749                            predecessor: Cell::new(None),
750                            lhs_syntax,
751                            rhs_syntax,
752                            parents,
753                            lhs_parent_id,
754                            rhs_parent_id,
755                        },
756                        alloc,
757                        seen,
758                    ),
759                ));
760            }
761            // Step into this partially/fully novel list.
762            Syntax::List { children, .. } => {
763                let rhs_next = children.first().copied();
764                let parents_next = push_rhs_delimiter(&v.parents, rhs_syntax, alloc);
765
766                let (lhs_syntax, rhs_syntax, lhs_parent_id, rhs_parent_id, parents) =
767                    pop_all_parents(
768                        v.lhs_syntax,
769                        rhs_next,
770                        v.lhs_parent_id,
771                        Some(rhs_syntax.id()),
772                        &parents_next,
773                        alloc,
774                    );
775
776                neighbours.push((
777                    EnterNovelDelimiterRHS {},
778                    allocate_if_new(
779                        Vertex {
780                            neighbours: RefCell::new(None),
781                            predecessor: Cell::new(None),
782                            lhs_syntax,
783                            rhs_syntax,
784                            parents,
785                            lhs_parent_id,
786                            rhs_parent_id,
787                        },
788                        alloc,
789                        seen,
790                    ),
791                ));
792            }
793        }
794    }
795    assert!(
796        !neighbours.is_empty(),
797        "Must always find some next steps if node is not the end"
798    );
799
800    v.neighbours
801        .replace(Some(alloc.alloc_slice_copy(neighbours.as_slice())));
802}
803
804pub fn populate_change_map<'s, 'v>(
805    route: &[(Edge, &'v Vertex<'s, 'v>)],
806    change_map: &mut ChangeMap<'s>,
807) {
808    for (e, v) in route {
809        match e {
810            UnchangedNode { .. } => {
811                // No change on this node or its children.
812                let lhs = v.lhs_syntax.unwrap();
813                let rhs = v.rhs_syntax.unwrap();
814
815                insert_deep_unchanged(lhs, rhs, change_map);
816                insert_deep_unchanged(rhs, lhs, change_map);
817            }
818            EnterUnchangedDelimiter { .. } => {
819                // No change on the outer delimiter, but children may
820                // have changed.
821                let lhs = v.lhs_syntax.unwrap();
822                let rhs = v.rhs_syntax.unwrap();
823                change_map.insert(lhs, ChangeKind::Unchanged(rhs));
824                change_map.insert(rhs, ChangeKind::Unchanged(lhs));
825            }
826            ReplacedComment { levenshtein_pct } | ReplacedString { levenshtein_pct } => {
827                let lhs = v.lhs_syntax.unwrap();
828                let rhs = v.rhs_syntax.unwrap();
829                let change_kind = |first, second| {
830                    if let ReplacedComment { .. } = e {
831                        ChangeKind::ReplacedComment(first, second)
832                    } else {
833                        ChangeKind::ReplacedString(first, second)
834                    }
835                };
836
837                // NOTE: 20% Levenshtein threshold. If levenshtein_pct <= 20,
838                // replaced comments/strings are treated as Novel (Delete+Insert)
839                // instead of ReplacedComment/ReplacedString (Update).
840                // This is difftastic's hard-coded behavior.
841                if *levenshtein_pct > 20 {
842                    change_map.insert(lhs, change_kind(lhs, rhs));
843                    change_map.insert(rhs, change_kind(rhs, lhs));
844                } else {
845                    change_map.insert(lhs, ChangeKind::Novel);
846                    change_map.insert(rhs, ChangeKind::Novel);
847                }
848            }
849            NovelAtomLHS { .. } | EnterNovelDelimiterLHS { .. } => {
850                let lhs = v.lhs_syntax.unwrap();
851                change_map.insert(lhs, ChangeKind::Novel);
852            }
853            NovelAtomRHS { .. } | EnterNovelDelimiterRHS { .. } => {
854                let rhs = v.rhs_syntax.unwrap();
855                change_map.insert(rhs, ChangeKind::Novel);
856            }
857        }
858    }
859}