imessage_database/tables/
handle.rs1use rusqlite::{CachedStatement, Connection, Error, Result, Row};
6use std::collections::{BTreeSet, HashMap, HashSet};
7
8use crate::{
9 error::table::TableError,
10 tables::table::{Cacheable, Deduplicate, Diagnostic, HANDLE, ME, Table},
11 util::output::{done_processing, processing},
12};
13
14#[derive(Debug)]
17pub struct Handle {
18 pub rowid: i32,
20 pub id: String,
22 pub person_centric_id: Option<String>,
24}
25
26impl Table for Handle {
28 fn from_row(row: &Row) -> Result<Handle> {
29 Ok(Handle {
30 rowid: row.get("rowid")?,
31 id: row.get("id")?,
32 person_centric_id: row.get("person_centric_id").unwrap_or(None),
33 })
34 }
35
36 fn get(db: &'_ Connection) -> Result<CachedStatement<'_>, TableError> {
37 Ok(db.prepare_cached(&format!("SELECT * from {HANDLE}"))?)
38 }
39
40 fn extract(handle: Result<Result<Self, Error>, Error>) -> Result<Self, TableError> {
41 match handle {
42 Ok(Ok(handle)) => Ok(handle),
43 Err(why) | Ok(Err(why)) => Err(TableError::QueryError(why)),
44 }
45 }
46}
47
48impl Cacheable for Handle {
50 type K = i32;
51 type V = String;
52 fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
67 let mut map = HashMap::new();
69 map.insert(0, ME.to_string());
71
72 let mut statement = Handle::get(db)?;
74
75 let handles = statement.query_map([], |row| Ok(Handle::from_row(row)))?;
77
78 for handle in handles {
80 let contact = Handle::extract(handle)?;
81 map.insert(contact.rowid, contact.id);
82 }
83
84 let dupe_contacts = Handle::get_person_id_map(db)?;
86 for contact in dupe_contacts {
87 let (id, new) = contact;
88 map.insert(id, new);
89 }
90
91 Ok(map)
93 }
94}
95
96impl Deduplicate for Handle {
98 type T = String;
99
100 fn dedupe(duplicated_data: &HashMap<i32, Self::T>) -> HashMap<i32, i32> {
120 let mut deduplicated_participants: HashMap<i32, i32> = HashMap::new();
121 let mut participant_to_unique_participant_id: HashMap<Self::T, i32> = HashMap::new();
122
123 let mut unique_participant_identifier = 0;
125
126 let mut sorted_dupes: Vec<(&i32, &Self::T)> = duplicated_data.iter().collect();
128 sorted_dupes.sort_by(|(a, _), (b, _)| a.cmp(b));
129
130 for (participant_id, participant) in sorted_dupes {
131 if let Some(id) = participant_to_unique_participant_id.get(participant) {
132 deduplicated_participants.insert(participant_id.to_owned(), id.to_owned());
133 } else {
134 participant_to_unique_participant_id
135 .insert(participant.to_owned(), unique_participant_identifier);
136 deduplicated_participants
137 .insert(participant_id.to_owned(), unique_participant_identifier);
138 unique_participant_identifier += 1;
139 }
140 }
141 deduplicated_participants
142 }
143}
144
145impl Diagnostic for Handle {
147 fn run_diagnostic(db: &Connection) -> Result<(), TableError> {
169 let query = concat!(
170 "SELECT COUNT(DISTINCT person_centric_id) ",
171 "FROM handle ",
172 "WHERE person_centric_id NOT NULL"
173 );
174
175 if let Ok(mut rows) = db.prepare(query) {
176 processing();
177
178 let handles_with_identical_ids: i32 = rows.query_row([], |r| r.get(0)).unwrap_or(0);
180
181 let all_handles = Self::cache(db)?;
183
184 let unique_handles = Self::dedupe(&all_handles);
186
187 let total_dupes =
189 all_handles.len() - HashSet::<&i32>::from_iter(unique_handles.values()).len();
190
191 done_processing();
192
193 println!("Handle diagnostic data:");
194 println!(" Total handles: {}", all_handles.len());
195 if handles_with_identical_ids > 0 || total_dupes > 0 {
196 if handles_with_identical_ids > 0 {
197 println!(" Handles with more than one ID: {handles_with_identical_ids}");
198 }
199 if total_dupes > 0 {
200 println!(" Total duplicated handles: {total_dupes}");
201 }
202 }
203 }
204
205 Ok(())
206 }
207}
208
209impl Handle {
211 fn get_person_id_map(db: &Connection) -> Result<HashMap<i32, String>, TableError> {
217 let mut person_to_id: HashMap<String, BTreeSet<String>> = HashMap::new();
218 let mut row_to_id: HashMap<i32, String> = HashMap::new();
219 let mut row_data: Vec<(String, i32, String)> = vec![];
220
221 let query = concat!(
223 "SELECT DISTINCT A.person_centric_id, A.rowid, A.id ",
224 "FROM handle A ",
225 "INNER JOIN handle B ON B.id = A.id ",
226 "WHERE A.person_centric_id NOT NULL ",
227 "ORDER BY A.person_centric_id",
228 );
229 let statement = db.prepare(query);
230
231 if let Ok(mut statement) = statement {
232 let contacts = statement.query_map([], |row| {
234 let person_centric_id: String = row.get(0)?;
235 let rowid: i32 = row.get(1)?;
236 let id: String = row.get(2)?;
237 Ok((person_centric_id, rowid, id))
238 })?;
239
240 for contact in contacts {
241 row_data.push(contact?);
242 }
243
244 for contact in &row_data {
246 let (person_centric_id, _, id) = contact;
247 if let Some(set) = person_to_id.get_mut(person_centric_id) {
248 set.insert(id.to_owned());
249 } else {
250 let mut set = BTreeSet::new();
251 set.insert(id.to_owned());
252 person_to_id.insert(person_centric_id.to_owned(), set);
253 }
254 }
255
256 for contact in &row_data {
258 let (person_centric_id, rowid, _) = contact;
259 let data_to_insert = match person_to_id.get_mut(person_centric_id) {
260 Some(person) => person.iter().cloned().collect::<Vec<String>>().join(" "),
261 None => panic!("Attempted to resolve contact with no person_centric_id!"),
262 };
263 row_to_id.insert(rowid.to_owned(), data_to_insert);
264 }
265 }
266
267 Ok(row_to_id)
268 }
269}
270
271#[cfg(test)]
273mod tests {
274 use crate::tables::{handle::Handle, table::Deduplicate};
275 use std::collections::{HashMap, HashSet};
276
277 #[test]
278 fn test_can_dedupe() {
279 let mut input: HashMap<i32, String> = HashMap::new();
280 input.insert(1, String::from("A")); input.insert(2, String::from("A")); input.insert(3, String::from("A")); input.insert(4, String::from("B")); input.insert(5, String::from("B")); input.insert(6, String::from("C")); let output = Handle::dedupe(&input);
288 let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
289 assert_eq!(expected_deduped_ids.len(), 3);
290 }
291
292 #[test]
293 fn test_same_values() {
295 let mut input_1: HashMap<i32, String> = HashMap::new();
296 input_1.insert(1, String::from("A"));
297 input_1.insert(2, String::from("A"));
298 input_1.insert(3, String::from("A"));
299 input_1.insert(4, String::from("B"));
300 input_1.insert(5, String::from("B"));
301 input_1.insert(6, String::from("C"));
302
303 let mut input_2: HashMap<i32, String> = HashMap::new();
304 input_2.insert(1, String::from("A"));
305 input_2.insert(2, String::from("A"));
306 input_2.insert(3, String::from("A"));
307 input_2.insert(4, String::from("B"));
308 input_2.insert(5, String::from("B"));
309 input_2.insert(6, String::from("C"));
310
311 let mut input_3: HashMap<i32, String> = HashMap::new();
312 input_3.insert(1, String::from("A"));
313 input_3.insert(2, String::from("A"));
314 input_3.insert(3, String::from("A"));
315 input_3.insert(4, String::from("B"));
316 input_3.insert(5, String::from("B"));
317 input_3.insert(6, String::from("C"));
318
319 let mut output_1 = Handle::dedupe(&input_1)
320 .into_iter()
321 .collect::<Vec<(i32, i32)>>();
322 let mut output_2 = Handle::dedupe(&input_2)
323 .into_iter()
324 .collect::<Vec<(i32, i32)>>();
325 let mut output_3 = Handle::dedupe(&input_3)
326 .into_iter()
327 .collect::<Vec<(i32, i32)>>();
328
329 output_1.sort_unstable();
330 output_2.sort_unstable();
331 output_3.sort_unstable();
332
333 assert_eq!(output_1, output_2);
334 assert_eq!(output_1, output_3);
335 assert_eq!(output_2, output_3);
336 }
337}