facet_diff/
tree.rs

1//! Tree diffing for Facet types using the cinereus algorithm.
2//!
3//! This module provides the bridge between facet-reflect's `Peek` and
4//! cinereus's tree diffing algorithm.
5
6use core::hash::Hasher;
7use std::borrow::Cow;
8use std::hash::DefaultHasher;
9
10use cinereus::{EditOp as CinereusEditOp, MatchingConfig, NodeData, Tree, diff_trees};
11use facet_core::{Def, StructKind, Type, UserType};
12use facet_diff_core::{Path, PathSegment};
13use facet_reflect::{HasFields, Peek};
14
15/// The kind of a node in the tree (for type-based matching).
16#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17pub enum NodeKind {
18    /// A struct with the given type name
19    Struct(&'static str),
20    /// An enum variant
21    EnumVariant(&'static str, &'static str), // (enum_name, variant_name)
22    /// A list/array/slice
23    List(&'static str),
24    /// A map
25    Map(&'static str),
26    /// An option
27    Option(&'static str),
28    /// A scalar value
29    Scalar(&'static str),
30}
31
32/// Label for a node (the actual value for leaves).
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct NodeLabel {
35    /// The path to this node from the root.
36    pub path: Path,
37}
38
39/// An edit operation in the diff.
40#[derive(Debug, Clone, PartialEq, Eq)]
41#[non_exhaustive]
42pub enum EditOp {
43    /// A value was updated (matched but content differs).
44    Update {
45        /// The path to the updated node
46        path: Path,
47        /// Hash of the old value
48        old_hash: u64,
49        /// Hash of the new value
50        new_hash: u64,
51    },
52    /// A node was inserted in tree B.
53    Insert {
54        /// The path where the node was inserted
55        path: Path,
56        /// Hash of the inserted value
57        hash: u64,
58    },
59    /// A node was deleted from tree A.
60    Delete {
61        /// The path where the node was deleted
62        path: Path,
63        /// Hash of the deleted value
64        hash: u64,
65    },
66    /// A node was moved from one location to another.
67    Move {
68        /// The original path
69        old_path: Path,
70        /// The new path
71        new_path: Path,
72        /// Hash of the moved value
73        hash: u64,
74    },
75}
76
77/// A tree built from a Peek value, ready for diffing.
78pub type FacetTree = Tree<NodeKind, NodeLabel>;
79
80/// Build a cinereus tree from a Peek value.
81pub fn build_tree<'mem, 'facet>(peek: Peek<'mem, 'facet>) -> FacetTree {
82    let mut builder = TreeBuilder::new();
83    let root_id = builder.build_node(peek, Path::new());
84    Tree {
85        arena: builder.arena,
86        root: root_id,
87    }
88}
89
90struct TreeBuilder {
91    arena: cinereus::indextree::Arena<NodeData<NodeKind, NodeLabel>>,
92}
93
94impl TreeBuilder {
95    fn new() -> Self {
96        Self {
97            arena: cinereus::indextree::Arena::new(),
98        }
99    }
100
101    fn build_node<'mem, 'facet>(
102        &mut self,
103        peek: Peek<'mem, 'facet>,
104        path: Path,
105    ) -> cinereus::indextree::NodeId {
106        // Compute structural hash
107        let mut hasher = DefaultHasher::new();
108        peek.structural_hash(&mut hasher);
109        let hash = hasher.finish();
110
111        // Determine the node kind
112        let kind = self.determine_kind(peek);
113
114        // Create node data
115        let data = NodeData {
116            hash,
117            kind,
118            label: Some(NodeLabel { path: path.clone() }),
119        };
120
121        // Create the node
122        let node_id = self.arena.new_node(data);
123
124        // Build children based on type
125        self.build_children(peek, node_id, path);
126
127        node_id
128    }
129
130    fn determine_kind<'mem, 'facet>(&self, peek: Peek<'mem, 'facet>) -> NodeKind {
131        match peek.shape().ty {
132            Type::User(UserType::Struct(_)) => NodeKind::Struct(peek.shape().type_identifier),
133            Type::User(UserType::Enum(_)) => {
134                if let Ok(e) = peek.into_enum()
135                    && let Ok(variant) = e.active_variant()
136                {
137                    return NodeKind::EnumVariant(peek.shape().type_identifier, variant.name);
138                }
139                NodeKind::Scalar(peek.shape().type_identifier)
140            }
141            _ => match peek.shape().def {
142                Def::List(_) | Def::Array(_) | Def::Slice(_) => {
143                    NodeKind::List(peek.shape().type_identifier)
144                }
145                Def::Map(_) => NodeKind::Map(peek.shape().type_identifier),
146                Def::Option(_) => NodeKind::Option(peek.shape().type_identifier),
147                _ => NodeKind::Scalar(peek.shape().type_identifier),
148            },
149        }
150    }
151
152    fn build_children<'mem, 'facet>(
153        &mut self,
154        peek: Peek<'mem, 'facet>,
155        parent_id: cinereus::indextree::NodeId,
156        path: Path,
157    ) {
158        match peek.shape().ty {
159            Type::User(UserType::Struct(_)) => {
160                if let Ok(s) = peek.into_struct() {
161                    for (field, field_peek) in s.fields() {
162                        // Skip metadata fields
163                        if field.is_metadata() {
164                            continue;
165                        }
166                        let child_path = path.with(PathSegment::Field(Cow::Borrowed(field.name)));
167                        let child_id = self.build_node(field_peek, child_path);
168                        parent_id.append(child_id, &mut self.arena);
169                    }
170                }
171            }
172            Type::User(UserType::Enum(_)) => {
173                if let Ok(e) = peek.into_enum()
174                    && let Ok(variant) = e.active_variant()
175                {
176                    let variant_path = path.with(PathSegment::Variant(Cow::Borrowed(variant.name)));
177                    for (i, (field, field_peek)) in e.fields().enumerate() {
178                        let child_path = if variant.data.kind == StructKind::Struct {
179                            variant_path.with(PathSegment::Field(Cow::Borrowed(field.name)))
180                        } else {
181                            variant_path.with(PathSegment::Index(i))
182                        };
183                        let child_id = self.build_node(field_peek, child_path);
184                        parent_id.append(child_id, &mut self.arena);
185                    }
186                }
187            }
188            _ => {
189                match peek.shape().def {
190                    Def::List(_) | Def::Array(_) | Def::Slice(_) => {
191                        if let Ok(list) = peek.into_list_like() {
192                            for (i, elem) in list.iter().enumerate() {
193                                let child_path = path.with(PathSegment::Index(i));
194                                let child_id = self.build_node(elem, child_path);
195                                parent_id.append(child_id, &mut self.arena);
196                            }
197                        }
198                    }
199                    Def::Map(_) => {
200                        if let Ok(map) = peek.into_map() {
201                            for (key, value) in map.iter() {
202                                let key_str = format!("{:?}", key);
203                                let child_path = path.with(PathSegment::Key(Cow::Owned(key_str)));
204                                let child_id = self.build_node(value, child_path);
205                                parent_id.append(child_id, &mut self.arena);
206                            }
207                        }
208                    }
209                    Def::Option(_) => {
210                        if let Ok(opt) = peek.into_option()
211                            && let Some(inner) = opt.value()
212                        {
213                            // For options, the child keeps the same path
214                            let child_id = self.build_node(inner, path);
215                            parent_id.append(child_id, &mut self.arena);
216                        }
217                    }
218                    _ => {
219                        // Scalar/leaf node - no children
220                    }
221                }
222            }
223        }
224    }
225}
226
227/// Compute the tree diff between two Facet values.
228pub fn tree_diff<'a, 'f, A: facet_core::Facet<'f>, B: facet_core::Facet<'f>>(
229    a: &'a A,
230    b: &'a B,
231) -> Vec<EditOp> {
232    let peek_a = Peek::new(a);
233    let peek_b = Peek::new(b);
234
235    let tree_a = build_tree(peek_a);
236    let tree_b = build_tree(peek_b);
237
238    let config = MatchingConfig::default();
239    let cinereus_ops = diff_trees(&tree_a, &tree_b, &config);
240
241    // Convert cinereus ops to our EditOp format, filtering out no-op moves
242    cinereus_ops
243        .into_iter()
244        .map(|op| convert_op(op, &tree_a, &tree_b))
245        .filter(|op| {
246            // Filter out MOVE operations where old and new paths are the same
247            // (these are no-ops from the user's perspective)
248            if let EditOp::Move {
249                old_path, new_path, ..
250            } = op
251            {
252                old_path != new_path
253            } else {
254                true
255            }
256        })
257        .collect()
258}
259
260fn convert_op(
261    op: CinereusEditOp<NodeKind, NodeLabel>,
262    tree_a: &FacetTree,
263    tree_b: &FacetTree,
264) -> EditOp {
265    match op {
266        CinereusEditOp::Update {
267            node_a,
268            node_b,
269            old_label,
270            new_label: _,
271        } => {
272            let path = old_label.map(|l| l.path).unwrap_or_else(Path::new);
273            EditOp::Update {
274                path,
275                old_hash: tree_a.get(node_a).hash,
276                new_hash: tree_b.get(node_b).hash,
277            }
278        }
279        CinereusEditOp::Insert { node_b, label, .. } => {
280            let path = label.map(|l| l.path).unwrap_or_else(Path::new);
281            EditOp::Insert {
282                path,
283                hash: tree_b.get(node_b).hash,
284            }
285        }
286        CinereusEditOp::Delete { node_a } => {
287            let data = tree_a.get(node_a);
288            let path = data
289                .label
290                .as_ref()
291                .map(|l| l.path.clone())
292                .unwrap_or_default();
293            EditOp::Delete {
294                path,
295                hash: data.hash,
296            }
297        }
298        CinereusEditOp::Move { node_a, node_b, .. } => {
299            let old_path = tree_a
300                .get(node_a)
301                .label
302                .as_ref()
303                .map(|l| l.path.clone())
304                .unwrap_or_default();
305            let new_path = tree_b
306                .get(node_b)
307                .label
308                .as_ref()
309                .map(|l| l.path.clone())
310                .unwrap_or_default();
311            EditOp::Move {
312                old_path,
313                new_path,
314                hash: tree_b.get(node_b).hash,
315            }
316        }
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323    use facet::Facet;
324
325    #[derive(Debug, Clone, PartialEq, Facet)]
326    struct Person {
327        name: String,
328        age: u32,
329    }
330
331    #[test]
332    fn test_identical_trees() {
333        let a = Person {
334            name: "Alice".into(),
335            age: 30,
336        };
337        let b = a.clone();
338
339        let ops = tree_diff(&a, &b);
340        assert!(ops.is_empty(), "Identical trees should have no edits");
341    }
342
343    #[test]
344    fn test_simple_update() {
345        let a = Person {
346            name: "Alice".into(),
347            age: 30,
348        };
349        let b = Person {
350            name: "Alice".into(),
351            age: 31,
352        };
353
354        let ops = tree_diff(&a, &b);
355        assert!(!ops.is_empty(), "Changed values should have edits");
356    }
357
358    #[test]
359    fn test_tree_building() {
360        let person = Person {
361            name: "Alice".into(),
362            age: 30,
363        };
364
365        let peek = Peek::new(&person);
366        let tree = build_tree(peek);
367
368        // Should have root + 2 fields (at minimum)
369        let node_count = tree.arena.count();
370        assert!(
371            node_count >= 3,
372            "Tree should have root and field nodes, got {}",
373            node_count
374        );
375    }
376}
377
378/// Result of computing similarity between two Peek values using tree diff.
379#[derive(Debug, Clone)]
380pub struct SimilarityResult<'mem, 'facet> {
381    /// Similarity score between 0.0 and 1.0
382    pub score: f64,
383    /// The edit operations if similarity is above threshold
384    pub edit_ops: Vec<EditOp>,
385    /// The first Peek value (from)
386    pub peek_a: Peek<'mem, 'facet>,
387    /// The second Peek value (to)
388    pub peek_b: Peek<'mem, 'facet>,
389}
390
391impl<'mem, 'facet> SimilarityResult<'mem, 'facet> {
392    /// Check if the elements are similar enough to be considered a match
393    pub fn is_similar(&self, threshold: f64) -> bool {
394        self.score >= threshold
395    }
396
397    /// Check if the elements are identical (score = 1.0)
398    pub fn is_identical(&self) -> bool {
399        self.score >= 1.0 - f64::EPSILON
400    }
401}
402
403/// Compute structural similarity between two Peek values using tree diff.
404///
405/// This uses the cinereus GumTree algorithm to:
406/// 1. Build trees from both Peek values
407/// 2. Compute a matching between nodes (hash-based + Dice coefficient)
408/// 3. Return a similarity score based on how many nodes matched
409///
410/// The similarity score is: `matched_nodes / max(nodes_a, nodes_b)`
411///
412/// # Arguments
413/// * `peek_a` - First value to compare
414/// * `peek_b` - Second value to compare
415/// * `config` - Optional matching configuration (uses defaults if None)
416///
417/// # Returns
418/// A `SimilarityResult` containing the score and edit operations
419pub fn compute_element_similarity<'mem, 'facet>(
420    peek_a: Peek<'mem, 'facet>,
421    peek_b: Peek<'mem, 'facet>,
422    config: Option<&MatchingConfig>,
423) -> SimilarityResult<'mem, 'facet> {
424    let tree_a = build_tree(peek_a);
425    let tree_b = build_tree(peek_b);
426
427    let default_config = MatchingConfig::default();
428    let config = config.unwrap_or(&default_config);
429
430    let matching = cinereus::compute_matching(&tree_a, &tree_b, config);
431
432    // Count nodes in each tree
433    let nodes_a = tree_a.arena.count();
434    let nodes_b = tree_b.arena.count();
435    let max_nodes = nodes_a.max(nodes_b);
436
437    // Similarity score: proportion of nodes that matched
438    let score = if max_nodes == 0 {
439        1.0 // Both empty = identical
440    } else {
441        matching.len() as f64 / max_nodes as f64
442    };
443
444    // Generate edit operations
445    let cinereus_ops = diff_trees(&tree_a, &tree_b, config);
446    let edit_ops = cinereus_ops
447        .into_iter()
448        .map(|op| convert_op(op, &tree_a, &tree_b))
449        .filter(|op| {
450            // Filter out no-op moves
451            if let EditOp::Move {
452                old_path, new_path, ..
453            } = op
454            {
455                old_path != new_path
456            } else {
457                true
458            }
459        })
460        .collect();
461
462    SimilarityResult {
463        score,
464        edit_ops,
465        peek_a,
466        peek_b,
467    }
468}
469
470/// Check if two sequence elements should be paired based on structural similarity.
471///
472/// This is a convenience function for sequence diffing that returns true
473/// if the elements are similar enough to be shown as a modification rather
474/// than a removal+addition.
475///
476/// # Arguments
477/// * `peek_a` - First element
478/// * `peek_b` - Second element
479/// * `threshold` - Minimum similarity score (0.0 to 1.0), recommended 0.5-0.7
480pub fn elements_are_similar<'mem, 'facet>(
481    peek_a: Peek<'mem, 'facet>,
482    peek_b: Peek<'mem, 'facet>,
483    threshold: f64,
484) -> bool {
485    let result = compute_element_similarity(peek_a, peek_b, None);
486    result.is_similar(threshold)
487}