use ahash::HashSet;
use xot::{Attributes, Element};
const IDENTICAL_VALUE_WEIGHT: f32 = 4.0;
pub(crate) fn element_similarity(element_a: &Element, element_b: &Element) -> Option<f32> {
if element_a.name() != element_b.name() {
return None;
}
let attributes_a = element_a.attributes();
let attributes_b = element_b.attributes();
Some(attributes_similarity(attributes_a, attributes_b))
}
fn attributes_similarity(attributes_a: &Attributes, attributes_b: &Attributes) -> f32 {
if attributes_a.is_empty() && attributes_b.is_empty() {
return 1.0;
}
let keys_a = HashSet::from_iter(attributes_a.keys());
let keys_b = HashSet::from_iter(attributes_b.keys());
let identical_keys = keys_a.intersection(&keys_b).count() as f32;
let mut identical_values: f32 = 0.0;
for key in keys_a.intersection(&keys_b) {
let value_a = attributes_a.get(*key).unwrap();
let value_b = attributes_b.get(*key).unwrap();
if value_a == value_b {
identical_values += 1.0;
}
}
let max_attributes = std::cmp::max(attributes_a.len(), attributes_b.len()) as f32;
(identical_keys + identical_values * IDENTICAL_VALUE_WEIGHT + 4.0)
/ (max_attributes + max_attributes * IDENTICAL_VALUE_WEIGHT + 4.0)
}
#[cfg(test)]
mod tests {
use super::*;
use xot::Xot;
#[test]
fn test_both_empty() {
let attributes_a = Attributes::new();
let attributes_b = Attributes::new();
assert_eq!(attributes_similarity(&attributes_a, &attributes_b), 1.0);
}
#[test]
fn test_one_empty() {
let mut xot = Xot::new();
let name = xot.add_name("a");
let mut attributes_a = Attributes::new();
attributes_a.insert(name, "b".to_string());
let attributes_b = Attributes::new();
assert_eq!(
attributes_similarity(&attributes_a, &attributes_b),
0.44444445
);
}
#[test]
fn test_same_key_different_value() {
let mut xot = Xot::new();
let name = xot.add_name("a");
let mut attributes_a = Attributes::new();
attributes_a.insert(name, "b".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name, "c".to_string());
assert_eq!(
attributes_similarity(&attributes_a, &attributes_b),
0.5555556
);
}
#[test]
fn test_same_key_same_value() {
let mut xot = Xot::new();
let name = xot.add_name("a");
let mut attributes_a = Attributes::new();
attributes_a.insert(name, "b".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name, "b".to_string());
assert_eq!(attributes_similarity(&attributes_a, &attributes_b), 1.0);
}
#[test]
fn test_2_same_keys_different_values() {
let mut xot = Xot::new();
let name_a = xot.add_name("a");
let name_b = xot.add_name("b");
let mut attributes_a = Attributes::new();
attributes_a.insert(name_a, "A".to_string());
attributes_a.insert(name_b, "B".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name_a, "D".to_string());
attributes_b.insert(name_b, "E".to_string());
assert_eq!(
attributes_similarity(&attributes_a, &attributes_b,),
0.42857143
);
}
#[test]
fn test_2_same_keys_same_values() {
let mut xot = Xot::new();
let name_a = xot.add_name("a");
let name_b = xot.add_name("b");
let mut attributes_a = Attributes::new();
attributes_a.insert(name_a, "A".to_string());
attributes_a.insert(name_b, "B".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name_a, "A".to_string());
attributes_b.insert(name_b, "B".to_string());
assert_eq!(attributes_similarity(&attributes_a, &attributes_b,), 1.0);
}
#[test]
fn test_2_same_keys_half_same_values() {
let mut xot = Xot::new();
let name_a = xot.add_name("a");
let name_b = xot.add_name("b");
let mut attributes_a = Attributes::new();
attributes_a.insert(name_a, "A".to_string());
attributes_a.insert(name_b, "B".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name_b, "B".to_string());
assert_eq!(
attributes_similarity(&attributes_a, &attributes_b),
0.64285713
);
}
#[test]
fn test_2_half_different_keys_half_same_values() {
let mut xot = Xot::new();
let name_a = xot.add_name("a");
let name_b = xot.add_name("b");
let name_c = xot.add_name("c");
let mut attributes_a = Attributes::new();
attributes_a.insert(name_a, "A".to_string());
attributes_a.insert(name_b, "B".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name_c, "C".to_string());
attributes_b.insert(name_b, "B".to_string());
assert_eq!(
attributes_similarity(&attributes_a, &attributes_b),
0.64285713
);
}
#[test]
fn test_3_same_keys_different_values() {
let mut xot = Xot::new();
let name_a = xot.add_name("a");
let name_b = xot.add_name("b");
let name_c = xot.add_name("c");
let mut attributes_a = Attributes::new();
attributes_a.insert(name_a, "A".to_string());
attributes_a.insert(name_b, "B".to_string());
attributes_a.insert(name_c, "C".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name_a, "D".to_string());
attributes_b.insert(name_b, "E".to_string());
attributes_b.insert(name_c, "F".to_string());
assert_eq!(
attributes_similarity(&attributes_a, &attributes_b,),
0.36842105
);
}
#[test]
fn test_3_same_keys_2_different_values() {
let mut xot = Xot::new();
let name_a = xot.add_name("a");
let name_b = xot.add_name("b");
let name_c = xot.add_name("c");
let mut attributes_a = Attributes::new();
attributes_a.insert(name_a, "A".to_string());
attributes_a.insert(name_b, "B".to_string());
attributes_a.insert(name_c, "C".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name_a, "A".to_string());
attributes_b.insert(name_b, "E".to_string());
attributes_b.insert(name_c, "F".to_string());
assert_eq!(
attributes_similarity(&attributes_a, &attributes_b,),
0.57894737
);
}
#[test]
fn test_3_same_keys_1_different_values() {
let mut xot = Xot::new();
let name_a = xot.add_name("a");
let name_b = xot.add_name("b");
let name_c = xot.add_name("c");
let mut attributes_a = Attributes::new();
attributes_a.insert(name_a, "A".to_string());
attributes_a.insert(name_b, "B".to_string());
attributes_a.insert(name_c, "C".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name_a, "A".to_string());
attributes_b.insert(name_b, "B".to_string());
attributes_b.insert(name_c, "F".to_string());
assert_eq!(
attributes_similarity(&attributes_a, &attributes_b,),
0.7894737
);
}
#[test]
fn test_3_same_keys_same_values() {
let mut xot = Xot::new();
let name_a = xot.add_name("a");
let name_b = xot.add_name("b");
let name_c = xot.add_name("c");
let mut attributes_a = Attributes::new();
attributes_a.insert(name_a, "A".to_string());
attributes_a.insert(name_b, "B".to_string());
attributes_a.insert(name_c, "C".to_string());
let mut attributes_b = Attributes::new();
attributes_b.insert(name_a, "A".to_string());
attributes_b.insert(name_b, "B".to_string());
attributes_b.insert(name_c, "C".to_string());
assert_eq!(attributes_similarity(&attributes_a, &attributes_b,), 1.0);
}
}