imessage_database/tables/
handle.rs1use rusqlite::{CachedStatement, Connection, Result, Row};
6use std::collections::{BTreeSet, HashMap, HashSet};
7
8use crate::{
9 error::table::TableError,
10 tables::{
11 diagnostic::{HandleDiagnostic, column_exists, count_query},
12 table::{Cacheable, HANDLE, ME, Table},
13 },
14};
15
16#[derive(Debug)]
19pub struct Handle {
20 pub rowid: i32,
22 pub id: String,
24 pub person_centric_id: Option<String>,
26}
27
28impl Table for Handle {
30 fn from_row(row: &Row) -> Result<Handle> {
31 Ok(Handle {
32 rowid: row.get("rowid")?,
33 id: row.get("id")?,
34 person_centric_id: row.get("person_centric_id").unwrap_or(None),
35 })
36 }
37
38 fn get(db: &'_ Connection) -> Result<CachedStatement<'_>, TableError> {
39 Ok(db.prepare_cached(&format!("SELECT * from {HANDLE}"))?)
40 }
41}
42
43impl Cacheable for Handle {
45 type K = i32;
46 type V = String;
47 fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
63 let mut map = HashMap::new();
64 map.insert(0, ME.to_string());
66
67 let mut statement = Handle::get(db)?;
69
70 for handle in Handle::rows(&mut statement, [])? {
72 let contact = handle?;
73 map.insert(contact.rowid, contact.id);
74 }
75
76 let dupe_contacts = Handle::get_person_id_map(db)?;
78 for contact in dupe_contacts {
79 let (id, new) = contact;
80 map.insert(id, new);
81 }
82
83 Ok(map)
84 }
85}
86
87impl Handle {
89 pub fn dedupe(duplicated_data: &HashMap<i32, String>) -> HashMap<i32, i32> {
108 let mut deduplicated_participants: HashMap<i32, i32> = HashMap::new();
109 let mut participant_to_unique_participant_id: HashMap<String, i32> = HashMap::new();
110
111 let mut unique_participant_identifier = 0;
113
114 let mut sorted_dupes: Vec<(&i32, &String)> = duplicated_data.iter().collect();
116 sorted_dupes.sort_by_key(|(a, _)| *a);
117
118 for (participant_id, participant) in sorted_dupes {
119 if let Some(id) = participant_to_unique_participant_id.get(participant) {
120 deduplicated_participants.insert(*participant_id, *id);
121 } else {
122 participant_to_unique_participant_id
123 .insert(participant.to_owned(), unique_participant_identifier);
124 deduplicated_participants.insert(*participant_id, unique_participant_identifier);
125 unique_participant_identifier += 1;
126 }
127 }
128 deduplicated_participants
129 }
130}
131
132impl Handle {
134 pub fn run_diagnostic(db: &Connection) -> Result<HandleDiagnostic, TableError> {
154 let query = concat!(
155 "SELECT COUNT(DISTINCT person_centric_id) ",
156 "FROM handle ",
157 "WHERE person_centric_id NOT NULL"
158 );
159
160 let handles_with_multiple_ids = if column_exists(db, HANDLE, "person_centric_id")? {
161 Some(count_query(db, query)?)
162 } else {
163 None
164 };
165
166 let all_handles = Self::cache(db)?;
168
169 let unique_handles = Self::dedupe(&all_handles);
171
172 let total_duplicated =
174 all_handles.len() - HashSet::<&i32>::from_iter(unique_handles.values()).len();
175
176 Ok(HandleDiagnostic {
177 total_handles: all_handles.len(),
178 handles_with_multiple_ids,
179 total_duplicated,
180 })
181 }
182}
183
184impl Handle {
186 fn get_person_id_map(db: &Connection) -> Result<HashMap<i32, String>, TableError> {
188 let mut person_to_id: HashMap<String, BTreeSet<String>> = HashMap::new();
189 let mut row_to_id: HashMap<i32, String> = HashMap::new();
190 let mut row_data: Vec<(String, i32, String)> = vec![];
191
192 let query = concat!(
194 "SELECT DISTINCT A.person_centric_id, A.rowid, A.id ",
195 "FROM handle A ",
196 "INNER JOIN handle B ON B.id = A.id ",
197 "WHERE A.person_centric_id NOT NULL ",
198 "ORDER BY A.person_centric_id",
199 );
200 let statement = db.prepare(query);
201
202 if let Ok(mut statement) = statement {
203 let contacts = statement.query_map([], |row| {
205 let person_centric_id: String = row.get(0)?;
206 let rowid: i32 = row.get(1)?;
207 let id: String = row.get(2)?;
208 Ok((person_centric_id, rowid, id))
209 })?;
210
211 for contact in contacts {
212 row_data.push(contact?);
213 }
214
215 for contact in &row_data {
217 let (person_centric_id, _, id) = contact;
218 if let Some(set) = person_to_id.get_mut(person_centric_id) {
219 set.insert(id.to_owned());
220 } else {
221 let mut set = BTreeSet::new();
222 set.insert(id.to_owned());
223 person_to_id.insert(person_centric_id.to_owned(), set);
224 }
225 }
226
227 for contact in &row_data {
229 let (person_centric_id, rowid, _) = contact;
230 let data_to_insert = match person_to_id.get_mut(person_centric_id) {
231 Some(person) => person.iter().cloned().collect::<Vec<String>>().join(" "),
232 None => continue,
233 };
234 row_to_id.insert(rowid.to_owned(), data_to_insert);
235 }
236 }
237
238 Ok(row_to_id)
239 }
240}
241
242#[cfg(test)]
244mod tests {
245 use crate::tables::handle::Handle;
246 use rusqlite::Connection;
247 use std::collections::{HashMap, HashSet};
248
249 #[test]
250 fn test_can_dedupe() {
251 let mut input: HashMap<i32, String> = HashMap::new();
252 input.insert(1, String::from("A")); input.insert(2, String::from("A")); input.insert(3, String::from("A")); input.insert(4, String::from("B")); input.insert(5, String::from("B")); input.insert(6, String::from("C")); let output = Handle::dedupe(&input);
260 let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
261 assert_eq!(expected_deduped_ids.len(), 3);
262 }
263
264 #[test]
265 fn test_same_values() {
267 let mut input_1: HashMap<i32, String> = HashMap::new();
268 input_1.insert(1, String::from("A"));
269 input_1.insert(2, String::from("A"));
270 input_1.insert(3, String::from("A"));
271 input_1.insert(4, String::from("B"));
272 input_1.insert(5, String::from("B"));
273 input_1.insert(6, String::from("C"));
274
275 let mut input_2: HashMap<i32, String> = HashMap::new();
276 input_2.insert(1, String::from("A"));
277 input_2.insert(2, String::from("A"));
278 input_2.insert(3, String::from("A"));
279 input_2.insert(4, String::from("B"));
280 input_2.insert(5, String::from("B"));
281 input_2.insert(6, String::from("C"));
282
283 let mut input_3: HashMap<i32, String> = HashMap::new();
284 input_3.insert(1, String::from("A"));
285 input_3.insert(2, String::from("A"));
286 input_3.insert(3, String::from("A"));
287 input_3.insert(4, String::from("B"));
288 input_3.insert(5, String::from("B"));
289 input_3.insert(6, String::from("C"));
290
291 let mut output_1 = Handle::dedupe(&input_1)
292 .into_iter()
293 .collect::<Vec<(i32, i32)>>();
294 let mut output_2 = Handle::dedupe(&input_2)
295 .into_iter()
296 .collect::<Vec<(i32, i32)>>();
297 let mut output_3 = Handle::dedupe(&input_3)
298 .into_iter()
299 .collect::<Vec<(i32, i32)>>();
300
301 output_1.sort_unstable();
302 output_2.sort_unstable();
303 output_3.sort_unstable();
304
305 assert_eq!(output_1, output_2);
306 assert_eq!(output_1, output_3);
307 assert_eq!(output_2, output_3);
308 }
309
310 #[test]
311 fn diagnostic_omits_person_centric_count_when_column_is_missing() {
312 let db = Connection::open_in_memory().unwrap();
313 db.execute(
314 "CREATE TABLE handle (ROWID INTEGER PRIMARY KEY, id TEXT NOT NULL)",
315 [],
316 )
317 .unwrap();
318 db.execute(
319 "INSERT INTO handle (ROWID, id) VALUES (1, 'first'), (2, 'second')",
320 [],
321 )
322 .unwrap();
323
324 let diagnostic = Handle::run_diagnostic(&db).unwrap();
325
326 assert_eq!(diagnostic.total_handles, 3);
327 assert_eq!(diagnostic.handles_with_multiple_ids, None);
328 }
329
330 #[test]
331 fn diagnostic_counts_person_centric_ids_when_column_exists() {
332 let db = Connection::open_in_memory().unwrap();
333 db.execute(
334 "CREATE TABLE handle (
335 ROWID INTEGER PRIMARY KEY,
336 id TEXT NOT NULL,
337 person_centric_id TEXT
338 )",
339 [],
340 )
341 .unwrap();
342 db.execute(
343 "INSERT INTO handle (ROWID, id, person_centric_id)
344 VALUES (1, 'first', 'person-1'),
345 (2, 'second', 'person-1'),
346 (3, 'third', 'person-2')",
347 [],
348 )
349 .unwrap();
350
351 let diagnostic = Handle::run_diagnostic(&db).unwrap();
352
353 assert_eq!(diagnostic.handles_with_multiple_ids, Some(2));
354 }
355}