use crate::node::{NodeKind, NormalizedNode};
#[must_use]
pub fn similarity_score(a: &NormalizedNode, b: &NormalizedNode) -> f64 {
let nodes_a = crate::node::count_nodes(a);
let nodes_b = crate::node::count_nodes(b);
if nodes_a == 0 && nodes_b == 0 {
return 1.0;
}
let matching = count_matching(a, b);
(2.0 * matching as f64) / (nodes_a + nodes_b) as f64
}
fn count_matching(a: &NormalizedNode, b: &NormalizedNode) -> usize {
if a.is_none() || b.is_none() {
return 0;
}
if std::mem::discriminant(&a.kind) != std::mem::discriminant(&b.kind) {
return 0;
}
if let (NodeKind::MacroCall { name: na }, NodeKind::MacroCall { name: nb }) = (&a.kind, &b.kind)
&& na != nb
{
return 0;
}
let self_match = usize::from(a.kind == b.kind);
self_match
+ a.children
.iter()
.zip(&b.children)
.map(|(ca, cb)| count_matching(ca, cb))
.sum::<usize>()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::node::{BinOpKind, LiteralKind, PlaceholderKind};
fn var(idx: usize) -> NormalizedNode {
NormalizedNode::leaf(NodeKind::Placeholder(PlaceholderKind::Variable, idx))
}
fn int_lit() -> NormalizedNode {
NormalizedNode::leaf(NodeKind::Literal(LiteralKind::Int))
}
#[test]
fn identical_leaves_score_one() {
let a = int_lit();
let b = int_lit();
assert!((similarity_score(&a, &b) - 1.0).abs() < f64::EPSILON);
}
#[test]
fn completely_different_kinds_score_zero() {
let a = int_lit();
let b = NormalizedNode::leaf(NodeKind::PatWild);
assert!(similarity_score(&a, &b) < f64::EPSILON);
}
#[test]
fn both_none_sentinels_score_one() {
let a = NormalizedNode::none();
let b = NormalizedNode::none();
assert!((similarity_score(&a, &b) - 1.0).abs() < f64::EPSILON);
}
#[test]
fn none_vs_real_node_score_zero() {
let a = NormalizedNode::none();
let b = int_lit();
assert!(similarity_score(&a, &b) < f64::EPSILON);
}
#[test]
fn different_child_counts_uses_shared_prefix() {
let a =
NormalizedNode::with_children(NodeKind::Block, vec![int_lit(), int_lit(), int_lit()]);
let b = NormalizedNode::with_children(
NodeKind::Block,
vec![int_lit(), int_lit(), int_lit(), var(0), var(1)],
);
let score = similarity_score(&a, &b);
assert!((score - 0.8).abs() < f64::EPSILON);
}
#[test]
fn if_with_else_vs_if_without_else() {
let with_else = NormalizedNode::with_children(
NodeKind::If,
vec![
var(0),
NormalizedNode::with_children(NodeKind::Block, vec![int_lit()]),
NormalizedNode::with_children(NodeKind::Block, vec![int_lit()]),
],
);
let without_else = NormalizedNode::with_children(
NodeKind::If,
vec![
var(0),
NormalizedNode::with_children(NodeKind::Block, vec![int_lit()]),
NormalizedNode::none(),
],
);
let score = similarity_score(&with_else, &without_else);
assert!((score - 0.8).abs() < f64::EPSILON);
}
#[test]
fn return_with_vs_without_value() {
let with_val = NormalizedNode::with_children(NodeKind::Return, vec![int_lit()]);
let without_val = NormalizedNode::with_children(NodeKind::Return, vec![]);
let score = similarity_score(&with_val, &without_val);
assert!((score - 2.0 / 3.0).abs() < f64::EPSILON);
}
#[test]
fn same_discriminant_different_data_no_self_match() {
let a = NormalizedNode::with_children(
NodeKind::BinaryOp(BinOpKind::Add),
vec![var(0), int_lit()],
);
let b = NormalizedNode::with_children(
NodeKind::BinaryOp(BinOpKind::Sub),
vec![var(0), int_lit()],
);
let score = similarity_score(&a, &b);
assert!((score - 2.0 / 3.0).abs() < f64::EPSILON);
}
#[test]
fn macro_call_different_names_score_zero() {
let a = NormalizedNode::with_children(
NodeKind::MacroCall {
name: "println".to_string(),
},
vec![int_lit()],
);
let b = NormalizedNode::with_children(
NodeKind::MacroCall {
name: "eprintln".to_string(),
},
vec![int_lit()],
);
assert!(similarity_score(&a, &b) < f64::EPSILON);
}
#[test]
fn macro_call_same_name_different_args_partial() {
let a = NormalizedNode::with_children(
NodeKind::MacroCall {
name: "println".to_string(),
},
vec![int_lit()],
);
let b = NormalizedNode::with_children(
NodeKind::MacroCall {
name: "println".to_string(),
},
vec![int_lit(), var(0)],
);
let score = similarity_score(&a, &b);
assert!((score - 0.8).abs() < f64::EPSILON);
}
#[test]
fn similarity_is_symmetric() {
let a = NormalizedNode::with_children(NodeKind::Block, vec![int_lit(), var(0)]);
let b = NormalizedNode::with_children(NodeKind::Block, vec![int_lit(), var(0), var(1)]);
assert!((similarity_score(&a, &b) - similarity_score(&b, &a)).abs() < f64::EPSILON);
}
}