use rusqlite::{CachedStatement, Connection, Result, Row};
use std::collections::{BTreeSet, HashMap, HashSet};
use crate::{
error::table::TableError,
tables::{
diagnostic::{HandleDiagnostic, column_exists, count_query},
table::{Cacheable, HANDLE, ME, Table},
},
};
#[derive(Debug)]
pub struct Handle {
pub rowid: i32,
pub id: String,
pub person_centric_id: Option<String>,
}
impl Table for Handle {
fn from_row(row: &Row) -> Result<Handle> {
Ok(Handle {
rowid: row.get("rowid")?,
id: row.get("id")?,
person_centric_id: row.get("person_centric_id").unwrap_or(None),
})
}
fn get(db: &'_ Connection) -> Result<CachedStatement<'_>, TableError> {
Ok(db.prepare_cached(&format!("SELECT * from {HANDLE}"))?)
}
}
impl Cacheable for Handle {
type K = i32;
type V = String;
fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
let mut map = HashMap::new();
map.insert(0, ME.to_string());
let mut statement = Handle::get(db)?;
for handle in Handle::rows(&mut statement, [])? {
let contact = handle?;
map.insert(contact.rowid, contact.id);
}
let dupe_contacts = Handle::get_person_id_map(db)?;
for contact in dupe_contacts {
let (id, new) = contact;
map.insert(id, new);
}
Ok(map)
}
}
impl Handle {
pub fn dedupe(duplicated_data: &HashMap<i32, String>) -> HashMap<i32, i32> {
let mut deduplicated_participants: HashMap<i32, i32> = HashMap::new();
let mut participant_to_unique_participant_id: HashMap<String, i32> = HashMap::new();
let mut unique_participant_identifier = 0;
let mut sorted_dupes: Vec<(&i32, &String)> = duplicated_data.iter().collect();
sorted_dupes.sort_by_key(|(a, _)| *a);
for (participant_id, participant) in sorted_dupes {
if let Some(id) = participant_to_unique_participant_id.get(participant) {
deduplicated_participants.insert(*participant_id, *id);
} else {
participant_to_unique_participant_id
.insert(participant.to_owned(), unique_participant_identifier);
deduplicated_participants.insert(*participant_id, unique_participant_identifier);
unique_participant_identifier += 1;
}
}
deduplicated_participants
}
}
impl Handle {
pub fn run_diagnostic(db: &Connection) -> Result<HandleDiagnostic, TableError> {
let query = concat!(
"SELECT COUNT(DISTINCT person_centric_id) ",
"FROM handle ",
"WHERE person_centric_id NOT NULL"
);
let handles_with_multiple_ids = if column_exists(db, HANDLE, "person_centric_id")? {
Some(count_query(db, query)?)
} else {
None
};
let all_handles = Self::cache(db)?;
let unique_handles = Self::dedupe(&all_handles);
let total_duplicated =
all_handles.len() - HashSet::<&i32>::from_iter(unique_handles.values()).len();
Ok(HandleDiagnostic {
total_handles: all_handles.len(),
handles_with_multiple_ids,
total_duplicated,
})
}
}
impl Handle {
fn get_person_id_map(db: &Connection) -> Result<HashMap<i32, String>, TableError> {
let mut person_to_id: HashMap<String, BTreeSet<String>> = HashMap::new();
let mut row_to_id: HashMap<i32, String> = HashMap::new();
let mut row_data: Vec<(String, i32, String)> = vec![];
let query = concat!(
"SELECT DISTINCT A.person_centric_id, A.rowid, A.id ",
"FROM handle A ",
"INNER JOIN handle B ON B.id = A.id ",
"WHERE A.person_centric_id NOT NULL ",
"ORDER BY A.person_centric_id",
);
let statement = db.prepare(query);
if let Ok(mut statement) = statement {
let contacts = statement.query_map([], |row| {
let person_centric_id: String = row.get(0)?;
let rowid: i32 = row.get(1)?;
let id: String = row.get(2)?;
Ok((person_centric_id, rowid, id))
})?;
for contact in contacts {
row_data.push(contact?);
}
for contact in &row_data {
let (person_centric_id, _, id) = contact;
if let Some(set) = person_to_id.get_mut(person_centric_id) {
set.insert(id.to_owned());
} else {
let mut set = BTreeSet::new();
set.insert(id.to_owned());
person_to_id.insert(person_centric_id.to_owned(), set);
}
}
for contact in &row_data {
let (person_centric_id, rowid, _) = contact;
let data_to_insert = match person_to_id.get_mut(person_centric_id) {
Some(person) => person.iter().cloned().collect::<Vec<String>>().join(" "),
None => continue,
};
row_to_id.insert(rowid.to_owned(), data_to_insert);
}
}
Ok(row_to_id)
}
}
#[cfg(test)]
mod tests {
use crate::tables::handle::Handle;
use rusqlite::Connection;
use std::collections::{HashMap, HashSet};
#[test]
fn test_can_dedupe() {
let mut input: HashMap<i32, String> = HashMap::new();
input.insert(1, String::from("A")); input.insert(2, String::from("A")); input.insert(3, String::from("A")); input.insert(4, String::from("B")); input.insert(5, String::from("B")); input.insert(6, String::from("C"));
let output = Handle::dedupe(&input);
let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
assert_eq!(expected_deduped_ids.len(), 3);
}
#[test]
fn test_same_values() {
let mut input_1: HashMap<i32, String> = HashMap::new();
input_1.insert(1, String::from("A"));
input_1.insert(2, String::from("A"));
input_1.insert(3, String::from("A"));
input_1.insert(4, String::from("B"));
input_1.insert(5, String::from("B"));
input_1.insert(6, String::from("C"));
let mut input_2: HashMap<i32, String> = HashMap::new();
input_2.insert(1, String::from("A"));
input_2.insert(2, String::from("A"));
input_2.insert(3, String::from("A"));
input_2.insert(4, String::from("B"));
input_2.insert(5, String::from("B"));
input_2.insert(6, String::from("C"));
let mut input_3: HashMap<i32, String> = HashMap::new();
input_3.insert(1, String::from("A"));
input_3.insert(2, String::from("A"));
input_3.insert(3, String::from("A"));
input_3.insert(4, String::from("B"));
input_3.insert(5, String::from("B"));
input_3.insert(6, String::from("C"));
let mut output_1 = Handle::dedupe(&input_1)
.into_iter()
.collect::<Vec<(i32, i32)>>();
let mut output_2 = Handle::dedupe(&input_2)
.into_iter()
.collect::<Vec<(i32, i32)>>();
let mut output_3 = Handle::dedupe(&input_3)
.into_iter()
.collect::<Vec<(i32, i32)>>();
output_1.sort_unstable();
output_2.sort_unstable();
output_3.sort_unstable();
assert_eq!(output_1, output_2);
assert_eq!(output_1, output_3);
assert_eq!(output_2, output_3);
}
#[test]
fn diagnostic_omits_person_centric_count_when_column_is_missing() {
let db = Connection::open_in_memory().unwrap();
db.execute(
"CREATE TABLE handle (ROWID INTEGER PRIMARY KEY, id TEXT NOT NULL)",
[],
)
.unwrap();
db.execute(
"INSERT INTO handle (ROWID, id) VALUES (1, 'first'), (2, 'second')",
[],
)
.unwrap();
let diagnostic = Handle::run_diagnostic(&db).unwrap();
assert_eq!(diagnostic.total_handles, 3);
assert_eq!(diagnostic.handles_with_multiple_ids, None);
}
#[test]
fn diagnostic_counts_person_centric_ids_when_column_exists() {
let db = Connection::open_in_memory().unwrap();
db.execute(
"CREATE TABLE handle (
ROWID INTEGER PRIMARY KEY,
id TEXT NOT NULL,
person_centric_id TEXT
)",
[],
)
.unwrap();
db.execute(
"INSERT INTO handle (ROWID, id, person_centric_id)
VALUES (1, 'first', 'person-1'),
(2, 'second', 'person-1'),
(3, 'third', 'person-2')",
[],
)
.unwrap();
let diagnostic = Handle::run_diagnostic(&db).unwrap();
assert_eq!(diagnostic.handles_with_multiple_ids, Some(2));
}
}