use std::collections::HashSet;
use crate::value::DictionaryValue;
use crate::zipper::{DictZipper, ValuedDictZipper};
use crate::CharUnit;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ValueDiff<U: CharUnit, V: DictionaryValue> {
pub path: Vec<U>,
pub left_value: V,
pub right_value: V,
}
impl<U: CharUnit, V: DictionaryValue> ValueDiff<U, V> {
pub fn new(path: Vec<U>, left_value: V, right_value: V) -> Self {
Self {
path,
left_value,
right_value,
}
}
pub fn path_string(&self) -> String {
U::to_string(&self.path)
}
}
#[derive(Clone, Debug)]
pub struct ValueDiffZipper<Z: ValuedDictZipper> {
left: Option<Z>,
right: Option<Z>,
path: Vec<Z::Unit>,
}
impl<Z: ValuedDictZipper> ValueDiffZipper<Z>
where
Z::Value: PartialEq,
{
pub fn new(left: Z, right: Z) -> Self {
Self {
left: Some(left),
right: Some(right),
path: Vec::new(),
}
}
pub fn both_active(&self) -> bool {
self.left.is_some() && self.right.is_some()
}
pub fn both_final(&self) -> bool {
self.left.as_ref().is_some_and(|z| z.is_final())
&& self.right.as_ref().is_some_and(|z| z.is_final())
}
pub fn left_value(&self) -> Option<Z::Value> {
self.left
.as_ref()
.filter(|z| z.is_final())
.and_then(|z| z.value())
}
pub fn right_value(&self) -> Option<Z::Value> {
self.right
.as_ref()
.filter(|z| z.is_final())
.and_then(|z| z.value())
}
pub fn values(&self) -> Option<(Z::Value, Z::Value)> {
match (self.left_value(), self.right_value()) {
(Some(l), Some(r)) => Some((l, r)),
_ => None,
}
}
pub fn values_differ(&self) -> bool {
match self.values() {
Some((l, r)) => l != r,
None => false,
}
}
pub fn iter_diffs(&self) -> ValueDiffIterator<Z> {
ValueDiffIterator::new(self.clone())
}
}
impl<Z: ValuedDictZipper> DictZipper for ValueDiffZipper<Z>
where
Z::Value: PartialEq,
{
type Unit = Z::Unit;
fn is_final(&self) -> bool {
self.values_differ()
}
fn descend(&self, label: Self::Unit) -> Option<Self> {
let new_left = self.left.as_ref().and_then(|z| z.descend(label));
let new_right = self.right.as_ref().and_then(|z| z.descend(label));
if new_left.is_some() && new_right.is_some() {
let mut new_path = self.path.clone();
new_path.push(label);
Some(Self {
left: new_left,
right: new_right,
path: new_path,
})
} else {
None
}
}
fn children(&self) -> impl Iterator<Item = (Self::Unit, Self)> {
let left_labels: HashSet<Z::Unit> = self
.left
.as_ref()
.map(|z| z.children().map(|(label, _)| label).collect())
.unwrap_or_default();
let right_labels: HashSet<Z::Unit> = self
.right
.as_ref()
.map(|z| z.children().map(|(label, _)| label).collect())
.unwrap_or_default();
let common_labels: Vec<Z::Unit> = {
let mut labels: Vec<_> = left_labels.intersection(&right_labels).copied().collect();
labels.sort_by(|a, b| format!("{:?}", a).cmp(&format!("{:?}", b)));
labels
};
let self_clone = self.clone();
common_labels
.into_iter()
.filter_map(move |label| self_clone.descend(label).map(|child| (label, child)))
}
fn path(&self) -> Vec<Self::Unit> {
self.path.clone()
}
}
pub struct ValueDiffIterator<Z: ValuedDictZipper> {
stack: Vec<ValueDiffZipper<Z>>,
seen: HashSet<Vec<Z::Unit>>,
}
impl<Z: ValuedDictZipper> ValueDiffIterator<Z>
where
Z::Value: PartialEq,
{
fn new(zipper: ValueDiffZipper<Z>) -> Self {
let mut stack = Vec::with_capacity(16);
stack.push(zipper);
Self {
stack,
seen: HashSet::new(),
}
}
}
impl<Z: ValuedDictZipper> Iterator for ValueDiffIterator<Z>
where
Z::Value: PartialEq,
{
type Item = ValueDiff<Z::Unit, Z::Value>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(zipper) = self.stack.pop() {
for (_label, child) in zipper.children() {
self.stack.push(child);
}
if zipper.is_final() {
let path = zipper.path();
if self.seen.insert(path.clone()) {
if let Some((left_value, right_value)) = zipper.values() {
return Some(ValueDiff::new(path, left_value, right_value));
}
}
}
}
None
}
}
pub trait ValueDiffZipperExt: ValuedDictZipper + Sized
where
Self::Value: PartialEq,
{
fn value_diff_with(self, other: Self) -> ValueDiffZipper<Self> {
ValueDiffZipper::new(self, other)
}
fn iter_value_diffs(self, other: Self) -> ValueDiffIterator<Self> {
ValueDiffZipper::new(self, other).iter_diffs()
}
}
impl<Z: ValuedDictZipper> ValueDiffZipperExt for Z where Z::Value: PartialEq {}
#[cfg(test)]
mod tests {
use super::*;
use crate::double_array_trie::DoubleArrayTrie;
use crate::double_array_trie_zipper::DoubleArrayTrieZipper;
fn sorted_diffs<U: CharUnit + Ord, V: DictionaryValue + Ord>(
mut diffs: Vec<ValueDiff<U, V>>,
) -> Vec<ValueDiff<U, V>> {
diffs.sort_by(|a, b| a.path.cmp(&b.path));
diffs
}
#[test]
fn test_value_diff_basic() {
let dict1 = DoubleArrayTrie::from_terms_with_values(
vec![("cat", 10usize), ("dog", 20), ("fish", 30)].into_iter(),
);
let dict2 = DoubleArrayTrie::from_terms_with_values(
vec![("cat", 10usize), ("dog", 25), ("fish", 35)].into_iter(),
);
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = ValueDiffZipper::new(z1, z2);
let results = sorted_diffs(diff.iter_diffs().collect());
assert_eq!(results.len(), 2);
assert_eq!(results[0].path, b"dog".to_vec());
assert_eq!(results[0].left_value, 20);
assert_eq!(results[0].right_value, 25);
assert_eq!(results[1].path, b"fish".to_vec());
assert_eq!(results[1].left_value, 30);
assert_eq!(results[1].right_value, 35);
}
#[test]
fn test_value_diff_identical() {
let dict1 = DoubleArrayTrie::from_terms_with_values(
vec![("cat", 10usize), ("dog", 20)].into_iter(),
);
let dict2 = DoubleArrayTrie::from_terms_with_values(
vec![("cat", 10usize), ("dog", 20)].into_iter(),
);
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
let count = diff.iter_diffs().count();
assert_eq!(count, 0);
}
#[test]
fn test_value_diff_all_different() {
let dict1 = DoubleArrayTrie::from_terms_with_values(
vec![("a", 1usize), ("b", 2), ("c", 3)].into_iter(),
);
let dict2 = DoubleArrayTrie::from_terms_with_values(
vec![("a", 10usize), ("b", 20), ("c", 30)].into_iter(),
);
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
let count = diff.iter_diffs().count();
assert_eq!(count, 3);
}
#[test]
fn test_value_diff_disjoint_dicts() {
let dict1 =
DoubleArrayTrie::from_terms_with_values(vec![("cat", 1usize), ("dog", 2)].into_iter());
let dict2 = DoubleArrayTrie::from_terms_with_values(
vec![("fish", 3usize), ("bird", 4)].into_iter(),
);
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
let count = diff.iter_diffs().count();
assert_eq!(count, 0);
}
#[test]
fn test_value_diff_partial_overlap() {
let dict1 = DoubleArrayTrie::from_terms_with_values(
vec![("cat", 1usize), ("dog", 2), ("fish", 3)].into_iter(),
);
let dict2 = DoubleArrayTrie::from_terms_with_values(
vec![("dog", 20usize), ("bird", 4)].into_iter(),
);
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
let results: Vec<_> = diff.iter_diffs().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0].path, b"dog".to_vec());
assert_eq!(results[0].left_value, 2);
assert_eq!(results[0].right_value, 20);
}
#[test]
fn test_value_diff_descend() {
let dict1 = DoubleArrayTrie::from_terms_with_values(
vec![("score", 85u32), ("count", 100)].into_iter(),
);
let dict2 = DoubleArrayTrie::from_terms_with_values(
vec![("score", 92u32), ("count", 100)].into_iter(),
);
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
let score = diff
.descend(b's')
.and_then(|z| z.descend(b'c'))
.and_then(|z| z.descend(b'o'))
.and_then(|z| z.descend(b'r'))
.and_then(|z| z.descend(b'e'))
.unwrap();
assert!(score.is_final()); assert_eq!(score.left_value(), Some(85));
assert_eq!(score.right_value(), Some(92));
assert_eq!(score.values(), Some((85, 92)));
let count = diff
.descend(b'c')
.and_then(|z| z.descend(b'o'))
.and_then(|z| z.descend(b'u'))
.and_then(|z| z.descend(b'n'))
.and_then(|z| z.descend(b't'))
.unwrap();
assert!(!count.is_final()); assert!(count.both_final()); assert!(!count.values_differ()); }
#[test]
fn test_value_diff_children() {
let dict1 = DoubleArrayTrie::from_terms_with_values(
vec![("ab", 1usize), ("ac", 2), ("ad", 3)].into_iter(),
);
let dict2 = DoubleArrayTrie::from_terms_with_values(
vec![("ab", 10usize), ("ac", 2), ("ae", 5)].into_iter(),
);
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
let a = diff.descend(b'a').unwrap();
let mut children: Vec<u8> = a.children().map(|(label, _)| label).collect();
children.sort();
assert_eq!(children, vec![b'b', b'c']);
}
#[test]
fn test_value_diff_path_string() {
let dict1 = DoubleArrayTrie::from_terms_with_values(vec![("hello", 1usize)].into_iter());
let dict2 = DoubleArrayTrie::from_terms_with_values(vec![("hello", 2usize)].into_iter());
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
let results: Vec<_> = diff.iter_diffs().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0].path_string(), "hello");
}
#[test]
fn test_value_diff_extension_trait() {
let dict1 = DoubleArrayTrie::from_terms_with_values(vec![("key", 1usize)].into_iter());
let dict2 = DoubleArrayTrie::from_terms_with_values(vec![("key", 2usize)].into_iter());
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let results: Vec<_> = z1.iter_value_diffs(z2).collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0].left_value, 1);
assert_eq!(results[0].right_value, 2);
}
#[test]
fn test_value_diff_with_strings() {
let dict1 =
DoubleArrayTrie::from_terms_with_values(vec![("key", "old".to_string())].into_iter());
let dict2 =
DoubleArrayTrie::from_terms_with_values(vec![("key", "new".to_string())].into_iter());
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
let results: Vec<_> = diff.iter_diffs().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0].left_value, "old".to_string());
assert_eq!(results[0].right_value, "new".to_string());
}
#[test]
fn test_value_diff_nested_prefixes() {
let dict1 = DoubleArrayTrie::from_terms_with_values(
vec![("app", 1usize), ("apple", 2), ("application", 3)].into_iter(),
);
let dict2 = DoubleArrayTrie::from_terms_with_values(
vec![("app", 1usize), ("apple", 20), ("application", 3)].into_iter(),
);
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
let results: Vec<_> = diff.iter_diffs().collect();
assert_eq!(results.len(), 1);
assert_eq!(results[0].path, b"apple".to_vec());
assert_eq!(results[0].left_value, 2);
assert_eq!(results[0].right_value, 20);
}
#[test]
fn test_both_active_and_final() {
let dict1 = DoubleArrayTrie::from_terms_with_values(vec![("cat", 1usize)].into_iter());
let dict2 = DoubleArrayTrie::from_terms_with_values(vec![("cat", 2usize)].into_iter());
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let diff = z1.value_diff_with(z2);
assert!(diff.both_active());
let cat = diff
.descend(b'c')
.and_then(|z| z.descend(b'a'))
.and_then(|z| z.descend(b't'))
.unwrap();
assert!(cat.both_active());
assert!(cat.both_final());
assert!(cat.values_differ());
}
}