use std::collections::HashSet;
use crate::zipper::{DictZipper, ValuedDictZipper};
#[derive(Clone, Debug)]
pub struct SymmetricDifferenceZipper<Z: DictZipper> {
zippers: Vec<Option<Z>>,
path: Vec<Z::Unit>,
}
impl<Z: DictZipper> SymmetricDifferenceZipper<Z> {
pub fn new(zippers: Vec<Z>) -> Self {
Self {
zippers: zippers.into_iter().map(Some).collect(),
path: Vec::new(),
}
}
pub fn dictionary_count(&self) -> usize {
self.zippers.len()
}
pub fn active_dictionary_count(&self) -> usize {
self.zippers.iter().filter(|z| z.is_some()).count()
}
pub fn final_count(&self) -> usize {
self.zippers
.iter()
.filter_map(|z| z.as_ref())
.filter(|z| z.is_final())
.count()
}
pub fn unique_source_index(&self) -> Option<usize> {
if self.final_count() != 1 {
return None;
}
self.zippers
.iter()
.enumerate()
.find(|(_, z)| z.as_ref().is_some_and(|z| z.is_final()))
.map(|(i, _)| i)
}
pub fn iter(&self) -> SymmetricDifferenceIterator<Z> {
SymmetricDifferenceIterator::new(self.clone())
}
}
impl<Z: DictZipper> DictZipper for SymmetricDifferenceZipper<Z> {
type Unit = Z::Unit;
fn is_final(&self) -> bool {
self.final_count() == 1
}
fn descend(&self, label: Self::Unit) -> Option<Self> {
let new_zippers: Vec<Option<Z>> = self
.zippers
.iter()
.map(|z| z.as_ref().and_then(|z| z.descend(label)))
.collect();
if new_zippers.iter().any(|z| z.is_some()) {
let mut new_path = self.path.clone();
new_path.push(label);
Some(Self {
zippers: new_zippers,
path: new_path,
})
} else {
None
}
}
fn children(&self) -> impl Iterator<Item = (Self::Unit, Self)> {
let mut labels: Vec<Z::Unit> = self
.zippers
.iter()
.filter_map(|z| z.as_ref())
.flat_map(|z| z.children().map(|(label, _)| label))
.collect();
labels.sort_by(|a, b| format!("{:?}", a).cmp(&format!("{:?}", b)));
labels.dedup();
let self_clone = self.clone();
labels
.into_iter()
.filter_map(move |label| self_clone.descend(label).map(|child| (label, child)))
}
fn path(&self) -> Vec<Self::Unit> {
self.path.clone()
}
}
impl<Z: ValuedDictZipper> ValuedDictZipper for SymmetricDifferenceZipper<Z> {
type Value = Z::Value;
fn value(&self) -> Option<Self::Value> {
if !self.is_final() {
return None;
}
self.zippers
.iter()
.filter_map(|z| z.as_ref())
.find(|z| z.is_final())
.and_then(|z| z.value())
}
}
pub struct SymmetricDifferenceIterator<Z: DictZipper> {
stack: Vec<SymmetricDifferenceZipper<Z>>,
seen: HashSet<Vec<Z::Unit>>,
}
impl<Z: DictZipper> SymmetricDifferenceIterator<Z> {
fn new(zipper: SymmetricDifferenceZipper<Z>) -> Self {
let mut stack = Vec::with_capacity(16);
stack.push(zipper);
Self {
stack,
seen: HashSet::new(),
}
}
}
impl<Z: DictZipper> Iterator for SymmetricDifferenceIterator<Z> {
type Item = (Vec<Z::Unit>, SymmetricDifferenceZipper<Z>);
fn next(&mut self) -> Option<Self::Item> {
while let Some(zipper) = self.stack.pop() {
for (_label, child) in zipper.children() {
self.stack.push(child);
}
if zipper.is_final() {
let path = zipper.path();
if self.seen.insert(path.clone()) {
return Some((path, zipper));
}
}
}
None
}
}
pub struct ValuedSymmetricDifferenceIterator<Z: ValuedDictZipper> {
inner: SymmetricDifferenceIterator<Z>,
}
impl<Z: ValuedDictZipper> ValuedSymmetricDifferenceIterator<Z> {
pub fn new(zipper: SymmetricDifferenceZipper<Z>) -> Self {
Self {
inner: SymmetricDifferenceIterator::new(zipper),
}
}
}
impl<Z: ValuedDictZipper> Iterator for ValuedSymmetricDifferenceIterator<Z> {
type Item = (Vec<Z::Unit>, Z::Value);
fn next(&mut self) -> Option<Self::Item> {
loop {
let (path, zipper) = self.inner.next()?;
if let Some(value) = zipper.value() {
return Some((path, value));
}
}
}
}
pub trait SymmetricDifferenceZipperExt: DictZipper + Sized {
fn symmetric_difference_with(self, other: Self) -> SymmetricDifferenceZipper<Self> {
SymmetricDifferenceZipper::new(vec![self, other])
}
fn symmetric_difference_all(
self,
others: impl IntoIterator<Item = Self>,
) -> SymmetricDifferenceZipper<Self> {
let mut zippers = vec![self];
zippers.extend(others);
SymmetricDifferenceZipper::new(zippers)
}
}
impl<Z: DictZipper> SymmetricDifferenceZipperExt for Z {}
#[cfg(test)]
mod tests {
use super::*;
use crate::double_array_trie::DoubleArrayTrie;
use crate::double_array_trie_zipper::DoubleArrayTrieZipper;
fn sorted_strings(mut v: Vec<String>) -> Vec<String> {
v.sort();
v
}
#[test]
fn test_symmetric_difference_basic() {
let dict_a = DoubleArrayTrie::from_terms(vec!["cat", "dog", "fish"].iter());
let dict_b = DoubleArrayTrie::from_terms(vec!["dog", "fish", "bird"].iter());
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = SymmetricDifferenceZipper::new(vec![z_a, z_b]);
let results: Vec<String> = sorted_strings(
sym_diff
.iter()
.map(|(path, _)| String::from_utf8(path).unwrap())
.collect(),
);
assert_eq!(results, vec!["bird", "cat"]);
}
#[test]
fn test_symmetric_difference_identical() {
let dict_a = DoubleArrayTrie::from_terms(vec!["cat", "dog"].iter());
let dict_b = DoubleArrayTrie::from_terms(vec!["cat", "dog"].iter());
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a.symmetric_difference_with(z_b);
let count = sym_diff.iter().count();
assert_eq!(count, 0);
}
#[test]
fn test_symmetric_difference_disjoint() {
let dict_a = DoubleArrayTrie::from_terms(vec!["cat", "dog"].iter());
let dict_b = DoubleArrayTrie::from_terms(vec!["fish", "bird"].iter());
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a.symmetric_difference_with(z_b);
let results: Vec<String> = sorted_strings(
sym_diff
.iter()
.map(|(path, _)| String::from_utf8(path).unwrap())
.collect(),
);
assert_eq!(results, vec!["bird", "cat", "dog", "fish"]);
}
#[test]
fn test_symmetric_difference_with_empty() {
let dict_a = DoubleArrayTrie::from_terms(vec!["cat", "dog"].iter());
let dict_b: DoubleArrayTrie = DoubleArrayTrie::new();
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a.symmetric_difference_with(z_b);
let results: Vec<String> = sorted_strings(
sym_diff
.iter()
.map(|(path, _)| String::from_utf8(path).unwrap())
.collect(),
);
assert_eq!(results, vec!["cat", "dog"]);
}
#[test]
fn test_symmetric_difference_three_dicts() {
let dict1 = DoubleArrayTrie::from_terms(vec!["a", "b", "c"].iter());
let dict2 = DoubleArrayTrie::from_terms(vec!["b", "c", "d"].iter());
let dict3 = DoubleArrayTrie::from_terms(vec!["c", "d", "e"].iter());
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let z3 = DoubleArrayTrieZipper::new_from_dict(&dict3);
let sym_diff = z1.symmetric_difference_all(vec![z2, z3]);
let results: Vec<String> = sorted_strings(
sym_diff
.iter()
.map(|(path, _)| String::from_utf8(path).unwrap())
.collect(),
);
assert_eq!(results, vec!["a", "e"]);
}
#[test]
fn test_symmetric_difference_descend() {
let dict_a = DoubleArrayTrie::from_terms(vec!["cat", "car"].iter());
let dict_b = DoubleArrayTrie::from_terms(vec!["cat", "cab"].iter());
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a.symmetric_difference_with(z_b);
let cat = sym_diff
.descend(b'c')
.and_then(|z| z.descend(b'a'))
.and_then(|z| z.descend(b't'));
assert!(cat.is_some());
assert!(!cat.unwrap().is_final());
let car = sym_diff
.descend(b'c')
.and_then(|z| z.descend(b'a'))
.and_then(|z| z.descend(b'r'));
assert!(car.is_some());
assert!(car.unwrap().is_final());
let cab = sym_diff
.descend(b'c')
.and_then(|z| z.descend(b'a'))
.and_then(|z| z.descend(b'b'));
assert!(cab.is_some());
assert!(cab.unwrap().is_final()); }
#[test]
fn test_symmetric_difference_children() {
let dict_a = DoubleArrayTrie::from_terms(vec!["ab", "ac"].iter());
let dict_b = DoubleArrayTrie::from_terms(vec!["ac", "ad"].iter());
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a.symmetric_difference_with(z_b);
let a = sym_diff.descend(b'a').unwrap();
let mut children: Vec<u8> = a.children().map(|(label, _)| label).collect();
children.sort();
assert_eq!(children, vec![b'b', b'c', b'd']);
}
#[test]
fn test_symmetric_difference_with_values() {
let dict_a =
DoubleArrayTrie::from_terms_with_values(vec![("cat", 1usize), ("dog", 2)].into_iter());
let dict_b = DoubleArrayTrie::from_terms_with_values(
vec![("dog", 20usize), ("fish", 3)].into_iter(),
);
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a.symmetric_difference_with(z_b);
let cat = sym_diff
.descend(b'c')
.and_then(|z| z.descend(b'a'))
.and_then(|z| z.descend(b't'))
.unwrap();
assert!(cat.is_final());
assert_eq!(cat.value(), Some(1));
let fish = sym_diff
.descend(b'f')
.and_then(|z| z.descend(b'i'))
.and_then(|z| z.descend(b's'))
.and_then(|z| z.descend(b'h'))
.unwrap();
assert!(fish.is_final());
assert_eq!(fish.value(), Some(3));
let dog = sym_diff
.descend(b'd')
.and_then(|z| z.descend(b'o'))
.and_then(|z| z.descend(b'g'))
.unwrap();
assert!(!dog.is_final());
assert_eq!(dog.value(), None);
}
#[test]
fn test_symmetric_difference_valued_iterator() {
let dict_a =
DoubleArrayTrie::from_terms_with_values(vec![("cat", 1usize), ("dog", 2)].into_iter());
let dict_b = DoubleArrayTrie::from_terms_with_values(
vec![("dog", 20usize), ("fish", 3)].into_iter(),
);
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a.symmetric_difference_with(z_b);
let valued_iter = ValuedSymmetricDifferenceIterator::new(sym_diff);
let mut results: Vec<(String, usize)> = valued_iter
.map(|(path, val)| (String::from_utf8(path).unwrap(), val))
.collect();
results.sort_by(|a, b| a.0.cmp(&b.0));
assert_eq!(
results,
vec![("cat".to_string(), 1), ("fish".to_string(), 3),]
);
}
#[test]
fn test_unique_source_index() {
let dict_a = DoubleArrayTrie::from_terms(vec!["cat", "dog"].iter());
let dict_b = DoubleArrayTrie::from_terms(vec!["dog", "fish"].iter());
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a.symmetric_difference_with(z_b);
let cat = sym_diff
.descend(b'c')
.and_then(|z| z.descend(b'a'))
.and_then(|z| z.descend(b't'))
.unwrap();
assert_eq!(cat.unique_source_index(), Some(0));
let fish = sym_diff
.descend(b'f')
.and_then(|z| z.descend(b'i'))
.and_then(|z| z.descend(b's'))
.and_then(|z| z.descend(b'h'))
.unwrap();
assert_eq!(fish.unique_source_index(), Some(1));
let dog = sym_diff
.descend(b'd')
.and_then(|z| z.descend(b'o'))
.and_then(|z| z.descend(b'g'))
.unwrap();
assert_eq!(dog.unique_source_index(), None);
}
#[test]
fn test_dictionary_count() {
let dict1 = DoubleArrayTrie::from_terms(vec!["a"].iter());
let dict2 = DoubleArrayTrie::from_terms(vec!["b"].iter());
let dict3 = DoubleArrayTrie::from_terms(vec!["c"].iter());
let z1 = DoubleArrayTrieZipper::new_from_dict(&dict1);
let z2 = DoubleArrayTrieZipper::new_from_dict(&dict2);
let z3 = DoubleArrayTrieZipper::new_from_dict(&dict3);
let sym_diff = z1.symmetric_difference_all(vec![z2, z3]);
assert_eq!(sym_diff.dictionary_count(), 3);
assert_eq!(sym_diff.active_dictionary_count(), 3);
}
#[test]
fn test_property_symmetric_difference_equals_union_minus_intersection() {
let dict_a = DoubleArrayTrie::from_terms(vec!["cat", "dog", "fish"].iter());
let dict_b = DoubleArrayTrie::from_terms(vec!["dog", "fish", "bird"].iter());
let z_a = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a.symmetric_difference_with(z_b);
let sym_diff_terms: HashSet<String> = sym_diff
.iter()
.map(|(path, _)| String::from_utf8(path).unwrap())
.collect();
let a_terms: HashSet<String> = ["cat", "dog", "fish"]
.iter()
.map(|s| s.to_string())
.collect();
let b_terms: HashSet<String> = ["dog", "fish", "bird"]
.iter()
.map(|s| s.to_string())
.collect();
let union: HashSet<String> = a_terms.union(&b_terms).cloned().collect();
let intersection: HashSet<String> = a_terms.intersection(&b_terms).cloned().collect();
let expected: HashSet<String> = union.difference(&intersection).cloned().collect();
assert_eq!(sym_diff_terms, expected);
}
#[test]
fn test_property_symmetric_difference_equals_difference_union() {
let dict_a = DoubleArrayTrie::from_terms(vec!["cat", "dog", "fish"].iter());
let dict_b = DoubleArrayTrie::from_terms(vec!["dog", "fish", "bird"].iter());
let z_a1 = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b1 = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let z_a2 = DoubleArrayTrieZipper::new_from_dict(&dict_a);
let z_b2 = DoubleArrayTrieZipper::new_from_dict(&dict_b);
let sym_diff = z_a1.symmetric_difference_with(z_b1);
let sym_diff_terms: HashSet<String> = sym_diff
.iter()
.map(|(path, _)| String::from_utf8(path).unwrap())
.collect();
use crate::difference_zipper::DifferenceZipperExt;
let a_minus_b: HashSet<String> = z_a2
.clone()
.difference_from(z_b2.clone())
.iter()
.map(|(path, _)| String::from_utf8(path).unwrap())
.collect();
let b_minus_a: HashSet<String> = z_b2
.difference_from(z_a2)
.iter()
.map(|(path, _)| String::from_utf8(path).unwrap())
.collect();
let expected: HashSet<String> = a_minus_b.union(&b_minus_a).cloned().collect();
assert_eq!(sym_diff_terms, expected);
}
}