use crate::core::{encode, Code};
const ALPHA: f32 = 0.65;
#[inline]
#[must_use]
pub fn token_distance(a: &Code, b: &Code) -> f32 {
let mut best = 1.0f32;
for (&sp_a, &bl_a) in a.spectrals.iter().zip(a.blooms.iter()) {
let bucket_a = sp_a >> 29;
for (&sp_b, &bl_b) in b.spectrals.iter().zip(b.blooms.iter()) {
let bucket_b = sp_b >> 29;
if bucket_a.abs_diff(bucket_b) > 1 {
continue;
}
let h = (sp_a ^ sp_b).count_ones() as f32 / 32.0;
let inter = (bl_a & bl_b).count_ones() as f32;
let union = (bl_a | bl_b).count_ones() as f32;
let j = if union > 0.0 {
1.0 - inter / union
} else {
0.0
};
let d = ALPHA * h + (1.0 - ALPHA) * j;
if d < best {
best = d;
}
}
}
best
}
#[must_use]
pub fn similarity(query: &str, candidate: &str) -> f32 {
let qs = encode(query);
let cs = encode(candidate);
if qs.is_empty() || cs.is_empty() {
return 0.0;
}
let mut remaining: Vec<Code> = cs;
let mut total = 0.0f32;
for qt in &qs {
let mut best = 1.0f32;
let mut best_i: Option<usize> = None;
for (i, ct) in remaining.iter().enumerate() {
let d = token_distance(qt, ct);
if d < best {
best = d;
best_i = Some(i);
}
}
total += 1.0 - best;
if let Some(i) = best_i {
remaining.swap_remove(i);
}
}
total / qs.len() as f32
}
#[must_use]
pub fn matches(a: &str, b: &str) -> bool {
let qa = encode(a);
let qb = encode(b);
for ca in &qa {
for cb in &qb {
for &sa in &ca.spectrals {
for &sb in &cb.spectrals {
if sa == sb {
return true;
}
}
}
}
}
false
}