mergiraf/
class_mapping.rs

1use std::{fmt::Display, hash::Hash};
2
3use itertools::Itertools;
4use rustc_hash::FxHashMap;
5
6use crate::{matching::Matching, pcs::Revision, tree::AstNode};
7
8/// A node together with a marker of which revision it came from.
9#[derive(Debug, Copy, Clone)]
10pub struct RevNode<'a> {
11    pub rev: Revision,
12    pub node: &'a AstNode<'a>,
13}
14
15/// A node at a revision, which happens to be the leader of its class
16/// in a class-mapping.
17#[derive(Debug, Copy, Clone, PartialEq, Hash)]
18pub struct Leader<'a>(RevNode<'a>);
19
20impl PartialEq for RevNode<'_> {
21    fn eq(&self, other: &Self) -> bool {
22        // because we know the nodes are from the same revision, it's safe to compare them just by their ids
23        self.rev == other.rev && self.node.id == other.node.id
24    }
25}
26
27impl Eq for RevNode<'_> {}
28impl Eq for Leader<'_> {}
29
30impl<'a> RevNode<'a> {
31    pub fn new(rev: Revision, node: &'a AstNode<'a>) -> Self {
32        RevNode { rev, node }
33    }
34
35    /// Whether the subtree rooted at this node contains another node (up to class mapping).
36    pub fn contains(&self, other: &Leader<'a>, class_mapping: &ClassMapping<'a>) -> bool {
37        self.node.dfs().any(|descendant| {
38            class_mapping.map_to_leader(RevNode::new(self.rev, descendant)) == *other
39        })
40    }
41}
42
43impl<'a> Leader<'a> {
44    /// Returns the leader as one of the class representatives.
45    /// Uses of this method are generally suspicious, because this is an arbitrary choice
46    /// of class representative. It is preferable to choose the representative based on
47    /// the revision it belongs to.
48    pub fn as_representative(&self) -> RevNode<'a> {
49        self.0
50    }
51
52    /// The type of this node, which is guaranteed to be the same for all representatives
53    /// of this leader.
54    pub fn grammar_name(&self) -> &'static str {
55        self.0.node.grammar_name
56    }
57}
58
59impl Display for RevNode<'_> {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        write!(
62            f,
63            "{}:{}…{}@{}",
64            self.node.grammar_name, self.node.byte_range.start, self.node.byte_range.end, self.rev
65        )
66    }
67}
68
69impl Display for Leader<'_> {
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        write!(f, "{}", self.0)
72    }
73}
74
75impl Hash for RevNode<'_> {
76    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
77        self.rev.hash(state);
78        self.node.id.hash(state);
79    }
80}
81
82/// Creates classes of nodes across multiple revisions so that
83/// they can be equated when converting the corresponding trees
84/// to PCS, following the 3DM-Merge algorithm from Spork
85#[derive(Debug, Default)]
86pub struct ClassMapping<'a> {
87    map: FxHashMap<RevNode<'a>, Leader<'a>>,
88    representatives: FxHashMap<Leader<'a>, FxHashMap<Revision, RevNode<'a>>>,
89    exact_matchings: FxHashMap<Leader<'a>, i8>,
90    empty_repr: FxHashMap<Revision, RevNode<'a>>, // stays empty (only there for ownership purposes)
91}
92
93impl<'a> ClassMapping<'a> {
94    /// Creates an empty class mapping.
95    pub fn new() -> Self {
96        Self::default()
97    }
98
99    /// Adds a matching to the mapping. The `from_rev` indicates the revision that's on the left hand side of the mapping.
100    /// The `to_rev` indicates the revision that's on the right hand side of the matching.
101    /// We only add mappings for nodes which are previously not matched.
102    /// The `is_exact` parameters indicates if two nodes being matched indicates that they are isomorphic.
103    pub fn add_matching(
104        &mut self,
105        matching: &Matching<'a>,
106        from_rev: Revision,
107        to_rev: Revision,
108        is_exact: bool,
109    ) {
110        for (right_node, left_match) in matching.iter_right_to_left() {
111            let key = RevNode::new(to_rev, right_node);
112            let left_rev_node = RevNode::new(from_rev, left_match);
113            let leader = *self
114                .map
115                .entry(left_rev_node)
116                .or_insert(Leader(left_rev_node));
117            self.map.insert(key, leader);
118            let repr = self.representatives.entry(leader).or_default();
119            // keep track of exact matchings
120            if is_exact && !repr.contains_key(&to_rev) {
121                let exacts = self.exact_matchings.entry(leader).or_default();
122                *exacts += 1;
123            }
124            repr.insert(to_rev, key);
125            repr.insert(from_rev, left_rev_node);
126        }
127    }
128
129    /// Are the representatives of this leader all isomorphic?
130    /// In this case, it's not worth trying to merge their contents.
131    pub fn is_isomorphic_in_all_revisions(&self, leader: Leader<'a>) -> bool {
132        // if we know that at least two isomorphisms exist in the cluster, then by transitivity there are three of them
133        // and all revisions are isomorphic for this node
134        *self.exact_matchings.get(&leader).unwrap_or(&0) >= 2
135    }
136
137    /// Maps a node from some revision to its class representative
138    pub fn map_to_leader(&self, rev_node: RevNode<'a>) -> Leader<'a> {
139        self.map.get(&rev_node).copied().unwrap_or(Leader(rev_node))
140    }
141
142    /// Finds all the representatives in a cluster designated by its leader.
143    /// This can return an empty map if the cluster only contains this node!
144    fn internal_representatives(&self, leader: Leader<'a>) -> &FxHashMap<Revision, RevNode<'a>> {
145        self.representatives
146            .get(&leader)
147            .unwrap_or(&self.empty_repr)
148    }
149
150    /// The set of revisions for which we have a representative for this leader
151    pub fn revision_set(&self, leader: Leader<'a>) -> RevisionNESet {
152        let mut set = RevisionNESet::singleton(leader.0.rev);
153        self.internal_representatives(leader)
154            .keys()
155            .for_each(|k| set.add(*k));
156        set
157    }
158
159    /// The set of representatives for this leader
160    pub fn representatives(&self, leader: Leader<'a>) -> Vec<RevNode<'a>> {
161        let mut vec = self
162            .internal_representatives(leader)
163            .values()
164            .copied()
165            .collect_vec();
166        if vec.is_empty() {
167            vec.push(leader.as_representative());
168        }
169        vec
170    }
171
172    /// The list of children of the representative of the given leader at that revision,
173    /// represented themselves as leaders of their own clusters.
174    pub fn children_at_revision(
175        &self,
176        leader: Leader<'a>,
177        revision: Revision,
178    ) -> Option<Vec<Leader<'a>>> {
179        let repr = if leader.0.rev == revision {
180            &leader.0
181        } else {
182            self.internal_representatives(leader).get(&revision)?
183        };
184        Some(
185            repr.node
186                .children
187                .iter()
188                .map(|c| self.map_to_leader(RevNode::new(revision, c)))
189                .collect_vec(),
190        )
191    }
192
193    /// The AST node corresponding to this leader at a given revision
194    pub fn node_at_rev(
195        &self,
196        leader: Leader<'a>,
197        picked_revision: Revision,
198    ) -> Option<&'a AstNode<'a>> {
199        if leader.0.rev == picked_revision {
200            Some(leader.0.node)
201        } else {
202            self.internal_representatives(leader)
203                .get(&picked_revision)
204                .map(|rn| rn.node)
205        }
206    }
207
208    /// Checks whether the supplied revison (left or right) is only reformatting
209    /// the source (the unindented sources are different as strings but the trees are
210    /// isomorphic)
211    pub fn is_reformatting(&self, leader: Leader<'a>, revision: Revision) -> bool {
212        let base_source = self.node_at_rev(leader, Revision::Base);
213        let rev_source = self.node_at_rev(leader, revision);
214        if let (Some(base), Some(rev)) = (base_source, rev_source) {
215            base.hash == rev.hash && base.unindented_source() != rev.unindented_source()
216        } else {
217            false
218        }
219    }
220
221    /// Returns the field name from which a leader can be obtained from its parent.
222    /// In some cases it is possible that this field name differs from revision to revision.
223    /// We currently ignore this case and just return the first field name of any representative
224    /// of this leader.
225    pub fn field_name(&self, leader: Leader<'a>) -> Option<&'static str> {
226        leader.as_representative().node.field_name.or_else(|| {
227            self.internal_representatives(leader)
228                .iter()
229                .flat_map(|(_, node)| node.node.field_name.into_iter())
230                .next()
231        })
232    }
233}
234
235/// A set of [Revision]s
236#[derive(Debug, PartialEq, Eq, Copy, Clone, PartialOrd, Ord, Hash)]
237pub struct RevisionSet {
238    base: bool,
239    left: bool,
240    right: bool,
241}
242
243impl RevisionSet {
244    /// A set containing no revision
245    pub fn new() -> Self {
246        RevisionSet {
247            base: false,
248            left: false,
249            right: false,
250        }
251    }
252
253    /// Adds a revision to the set by modifying it
254    pub fn add(&mut self, revision: Revision) {
255        self.set(revision, true);
256    }
257
258    /// Adds a revision to the set by taking ownership
259    pub fn with(mut self, revision: Revision) -> RevisionSet {
260        self.add(revision);
261        self
262    }
263
264    /// Removes a revision from this set
265    pub fn remove(&mut self, revision: Revision) {
266        self.set(revision, false);
267    }
268
269    /// Sets whether the revision belongs to the set
270    pub fn set(&mut self, revision: Revision, presence: bool) {
271        match revision {
272            Revision::Base => self.base = presence,
273            Revision::Left => self.left = presence,
274            Revision::Right => self.right = presence,
275        }
276    }
277
278    /// Does this set of revisions contain the given revision?
279    pub fn contains(self, revision: Revision) -> bool {
280        match revision {
281            Revision::Base => self.base,
282            Revision::Left => self.left,
283            Revision::Right => self.right,
284        }
285    }
286
287    /// Set intersection
288    pub fn intersection(self, other: RevisionSet) -> RevisionSet {
289        RevisionSet {
290            base: self.base && other.base,
291            left: self.left && other.left,
292            right: self.right && other.right,
293        }
294    }
295
296    /// Returns any revision contained in the set,
297    /// by order of preference Left -> Right -> Base
298    pub fn any(self) -> Option<Revision> {
299        if self.left {
300            Some(Revision::Left)
301        } else if self.right {
302            Some(Revision::Right)
303        } else if self.base {
304            Some(Revision::Base)
305        } else {
306            None
307        }
308    }
309
310    pub fn is_empty(self) -> bool {
311        !(self.base || self.left || self.right)
312    }
313
314    /// Checked version of `is_empty`
315    pub fn as_nonempty(self) -> Option<RevisionNESet> {
316        if self.is_empty() {
317            None
318        } else {
319            Some(RevisionNESet(self))
320        }
321    }
322
323    pub fn is_full(self) -> bool {
324        self.base && self.left && self.right
325    }
326
327    /// Iterates on the revisions contained in this set (returned in decreasing priority)
328    pub fn iter(self) -> impl Iterator<Item = Revision> {
329        let mut vector = Vec::new();
330        if self.left {
331            vector.push(Revision::Left);
332        }
333        if self.right {
334            vector.push(Revision::Right);
335        }
336        if self.base {
337            vector.push(Revision::Base);
338        }
339        vector.into_iter()
340    }
341}
342
343impl Default for RevisionSet {
344    fn default() -> Self {
345        Self::new()
346    }
347}
348
349impl Display for RevisionSet {
350    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
351        write!(
352            f,
353            "/{}{}{}/",
354            if self.base { "B" } else { "." },
355            if self.left { "L" } else { "." },
356            if self.right { "R" } else { "." }
357        )
358    }
359}
360
361/// A non-empty [`RevisionSet`]
362#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
363pub struct RevisionNESet(RevisionSet);
364
365impl RevisionNESet {
366    /// Forget non-emptiness
367    pub fn set(self) -> RevisionSet {
368        self.0
369    }
370
371    /// A set containing a single revision
372    pub fn singleton(revision: Revision) -> Self {
373        let mut revisions = RevisionSet::new();
374        revisions.add(revision);
375        RevisionNESet(revisions)
376    }
377
378    /// Adds a revision to the set by modifying it
379    pub fn add(&mut self, revision: Revision) {
380        self.0.set(revision, true);
381    }
382
383    /// Adds a revision to the set by taking ownership
384    pub fn with(self, revision: Revision) -> RevisionNESet {
385        RevisionNESet(self.0.with(revision))
386    }
387
388    /// Does this set of revisions contain the given revision?
389    pub fn contains(self, revision: Revision) -> bool {
390        self.0.contains(revision)
391    }
392
393    /// Set intersection
394    pub fn intersection(self, other: RevisionSet) -> RevisionSet {
395        self.0.intersection(other)
396    }
397
398    /// Returns any revision contained in the set,
399    /// by order of preference Left -> Right -> Base
400    pub fn any(self) -> Revision {
401        self.0
402            .any()
403            .expect("RevisionNonEmptySet is actually empty, oops")
404    }
405
406    pub fn is_full(self) -> bool {
407        self.0.is_full()
408    }
409}
410
411impl Display for RevisionNESet {
412    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
413        self.0.fmt(f)
414    }
415}