use seahash::SeaHasher;
use std::hash::Hasher;
use crate::algebra::Algebra;
pub fn compute_fingerprint(plan: &Algebra) -> u64 {
let raw = format!("{plan:?}");
let normalised = normalise_variable_names(&raw);
let mut hasher = SeaHasher::new();
hasher.write(normalised.as_bytes());
hasher.finish()
}
fn normalise_variable_names(raw: &str) -> String {
use std::collections::HashMap;
const NEEDLE: &str = "Variable { name: \"";
let needle_len = NEEDLE.len();
let bytes = raw.as_bytes();
let len = bytes.len();
let mut out = String::with_capacity(len);
let mut mapping: HashMap<String, usize> = HashMap::new();
let mut next_idx: usize = 0;
let mut i = 0_usize;
while i < len {
if i + needle_len <= len && &raw[i..i + needle_len] == NEEDLE {
out.push_str(NEEDLE);
i += needle_len;
let mut var_name = String::new();
let mut escaped = false;
while i < len {
let b = bytes[i];
if escaped {
var_name.push(b as char);
escaped = false;
i += 1;
} else if b == b'\\' {
escaped = true;
var_name.push(b as char);
i += 1;
} else if b == b'"' {
let idx = *mapping.entry(var_name.clone()).or_insert_with(|| {
let v = next_idx;
next_idx += 1;
v
});
out.push_str(&format!("_v{idx}\""));
i += 1; break;
} else {
var_name.push(b as char);
i += 1;
}
}
} else {
out.push(bytes[i] as char);
i += 1;
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::algebra::{Term, TriplePattern, Variable};
use oxirs_core::model::NamedNode;
fn pred() -> Term {
Term::Iri(NamedNode::new_unchecked("http://example.org/p"))
}
fn var(name: &str) -> Term {
Term::Variable(Variable::new(name).expect("valid var"))
}
fn bgp(s: Term, o: Term) -> Algebra {
Algebra::Bgp(vec![TriplePattern {
subject: s,
predicate: pred(),
object: o,
}])
}
#[test]
fn identical_variables_same_fingerprint() {
let p1 = bgp(var("x"), var("y"));
let p2 = bgp(var("a"), var("b"));
assert_eq!(compute_fingerprint(&p1), compute_fingerprint(&p2));
}
#[test]
fn structurally_different_plans_different_fingerprint() {
let p1 = bgp(var("x"), var("y"));
let p2 = Algebra::Bgp(vec![
TriplePattern {
subject: var("x"),
predicate: pred(),
object: var("y"),
},
TriplePattern {
subject: var("y"),
predicate: pred(),
object: var("z"),
},
]);
assert_ne!(compute_fingerprint(&p1), compute_fingerprint(&p2));
}
#[test]
fn same_plan_same_fingerprint_deterministic() {
let p = bgp(var("x"), var("y"));
let fp1 = compute_fingerprint(&p);
let fp2 = compute_fingerprint(&p);
assert_eq!(fp1, fp2);
}
#[test]
fn join_plans_correct_normalisation() {
let p1 = Algebra::Join {
left: Box::new(bgp(var("a"), var("b"))),
right: Box::new(bgp(var("c"), var("d"))),
};
let p2 = Algebra::Join {
left: Box::new(bgp(var("x"), var("y"))),
right: Box::new(bgp(var("z"), var("w"))),
};
assert_eq!(compute_fingerprint(&p1), compute_fingerprint(&p2));
}
#[test]
fn empty_bgp_fingerprint_stable() {
let p = Algebra::Bgp(vec![]);
let fp = compute_fingerprint(&p);
assert_eq!(fp, compute_fingerprint(&p));
}
#[test]
fn debug_output_contains_name_token() {
let v = crate::algebra::Variable::new("myvar").expect("valid var");
let dbg = format!("{v:?}");
assert!(
dbg.contains("name: \"myvar\""),
"Expected 'name: \"myvar\"' in Variable debug output, got: {dbg}"
);
}
}