use std::collections::HashMap;
use rusqlite::params;
use strsim::jaro_winkler;
use tracing::debug;
use crate::core::config::TeamConfig;
use crate::core::db::Database;
pub const DEFAULT_SIMILARITY_THRESHOLD: f64 = 0.85;
pub struct IdentityResolver {
aliases: HashMap<String, String>,
members: Vec<(String, String)>,
threshold: f64,
}
impl IdentityResolver {
pub fn new(team: Option<&TeamConfig>) -> Self {
let mut aliases: HashMap<String, String> = HashMap::new();
let mut members: Vec<(String, String)> = Vec::new();
if let Some(team) = team {
for (k, v) in &team.aliases {
aliases.insert(k.to_lowercase(), v.clone());
}
for m in &team.members {
members.push((m.name.clone(), m.email.clone()));
for a in &m.aliases {
aliases.insert(a.to_lowercase(), m.name.clone());
}
aliases.insert(m.email.to_lowercase(), m.name.clone());
}
}
Self {
aliases,
members,
threshold: DEFAULT_SIMILARITY_THRESHOLD,
}
}
pub fn from_alias_map(map: &HashMap<String, Vec<String>>) -> Self {
let mut aliases: HashMap<String, String> = HashMap::new();
let mut members: Vec<(String, String)> = Vec::new();
for (canon_name, alias_list) in map {
let canon_email = alias_list
.iter()
.find(|a| a.contains('@'))
.cloned()
.unwrap_or_default();
members.push((canon_name.clone(), canon_email.clone()));
aliases.insert(canon_name.to_lowercase(), canon_name.clone());
if !canon_email.is_empty() {
aliases.insert(canon_email.to_lowercase(), canon_name.clone());
}
for a in alias_list {
aliases.insert(a.to_lowercase(), canon_name.clone());
}
}
Self {
aliases,
members,
threshold: DEFAULT_SIMILARITY_THRESHOLD,
}
}
pub fn from_config(config: &crate::core::config::Config) -> Self {
let map = config.resolved_aliases();
if !map.is_empty() {
Self::from_alias_map(&map)
} else {
Self::new(config.team.as_ref())
}
}
pub fn with_threshold(mut self, threshold: f64) -> Self {
self.threshold = threshold;
self
}
pub fn resolve(&self, name: &str, email: &str) -> (String, String) {
let email_lc = email.to_lowercase();
let name_lc = name.to_lowercase();
if let Some(canon_name) = self.aliases.get(&email_lc) {
if let Some((cn, ce)) = self.find_member_by_name(canon_name) {
return (cn, ce);
}
}
if let Some(canon_name) = self.aliases.get(&name_lc) {
if let Some((cn, ce)) = self.find_member_by_name(canon_name) {
return (cn, ce);
}
}
let mut best: Option<(f64, &(String, String))> = None;
for m in &self.members {
let s_name = jaro_winkler(&name_lc, &m.0.to_lowercase());
let s_email = jaro_winkler(&email_lc, &m.1.to_lowercase());
let score = s_name.max(s_email);
if score >= self.threshold && best.map(|(b, _)| score > b).unwrap_or(true) {
best = Some((score, m));
}
}
if let Some((score, m)) = best {
debug!(score, member = %m.0, "fuzzy identity match");
return (m.0.clone(), m.1.clone());
}
(name.to_string(), email.to_string())
}
pub fn upsert_author(
&self,
db: &Database,
name: &str,
email: &str,
) -> crate::core::Result<i64> {
let (canon_name, canon_email) = self.resolve(name, email);
let conn = db.connection();
conn.execute(
"INSERT INTO authors (canonical_name, canonical_email, aliases) \
VALUES (?1, ?2, '[]') \
ON CONFLICT(canonical_email) DO UPDATE SET canonical_name = excluded.canonical_name",
params![canon_name, canon_email],
)?;
let id: i64 = conn.query_row(
"SELECT id FROM authors WHERE canonical_email = ?1",
params![canon_email],
|row| row.get(0),
)?;
Ok(id)
}
fn find_member_by_name(&self, name: &str) -> Option<(String, String)> {
self.members
.iter()
.find(|(n, _)| n.eq_ignore_ascii_case(name))
.cloned()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::config::{TeamConfig, TeamMember};
use std::collections::HashMap;
fn make_team() -> TeamConfig {
let mut aliases = HashMap::new();
aliases.insert("bobby".into(), "Bob Smith".into());
TeamConfig {
members: vec![TeamMember {
name: "Bob Smith".into(),
email: "bob@example.com".into(),
aliases: vec!["bsmith@example.com".into()],
}],
aliases,
}
}
#[test]
fn exact_email_alias_match() {
let r = IdentityResolver::new(Some(&make_team()));
let (n, e) = r.resolve("Whoever", "bsmith@example.com");
assert_eq!(n, "Bob Smith");
assert_eq!(e, "bob@example.com");
}
#[test]
fn exact_name_alias_match() {
let r = IdentityResolver::new(Some(&make_team()));
let (n, e) = r.resolve("bobby", "x@y.com");
assert_eq!(n, "Bob Smith");
assert_eq!(e, "bob@example.com");
}
#[test]
fn fuzzy_match_canonical_name() {
let r = IdentityResolver::new(Some(&make_team()));
let (n, _e) = r.resolve("Bob Smyth", "unknown@elsewhere.com");
assert_eq!(n, "Bob Smith");
}
#[test]
fn no_match_returns_input() {
let r = IdentityResolver::new(Some(&make_team()));
let (n, e) = r.resolve("Zelda Q", "zelda@nowhere.test");
assert_eq!(n, "Zelda Q");
assert_eq!(e, "zelda@nowhere.test");
}
#[test]
fn empty_team_passthrough() {
let r = IdentityResolver::new(None);
let (n, e) = r.resolve("Anyone", "anyone@x.com");
assert_eq!(n, "Anyone");
assert_eq!(e, "anyone@x.com");
}
}