imessage_database/tables/
handle.rs1use rusqlite::{CachedStatement, Connection, Result, Row};
6use std::collections::{BTreeSet, HashMap, HashSet};
7
8use crate::{
9 error::table::TableError,
10 tables::{
11 diagnostic::HandleDiagnostic,
12 table::{Cacheable, HANDLE, ME, Table},
13 },
14};
15
16#[derive(Debug)]
19pub struct Handle {
20 pub rowid: i32,
22 pub id: String,
24 pub person_centric_id: Option<String>,
26}
27
28impl Table for Handle {
30 fn from_row(row: &Row) -> Result<Handle> {
31 Ok(Handle {
32 rowid: row.get("rowid")?,
33 id: row.get("id")?,
34 person_centric_id: row.get("person_centric_id").unwrap_or(None),
35 })
36 }
37
38 fn get(db: &'_ Connection) -> Result<CachedStatement<'_>, TableError> {
39 Ok(db.prepare_cached(&format!("SELECT * from {HANDLE}"))?)
40 }
41}
42
43impl Cacheable for Handle {
45 type K = i32;
46 type V = String;
47 fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
62 let mut map = HashMap::new();
64 map.insert(0, ME.to_string());
66
67 let mut statement = Handle::get(db)?;
69
70 let handles = statement.query_map([], |row| Ok(Handle::from_row(row)))?;
72
73 for handle in handles {
75 let contact = Handle::extract(handle)?;
76 map.insert(contact.rowid, contact.id);
77 }
78
79 let dupe_contacts = Handle::get_person_id_map(db)?;
81 for contact in dupe_contacts {
82 let (id, new) = contact;
83 map.insert(id, new);
84 }
85
86 Ok(map)
88 }
89}
90
91impl Handle {
93 pub fn dedupe(duplicated_data: &HashMap<i32, String>) -> HashMap<i32, i32> {
113 let mut deduplicated_participants: HashMap<i32, i32> = HashMap::new();
114 let mut participant_to_unique_participant_id: HashMap<String, i32> = HashMap::new();
115
116 let mut unique_participant_identifier = 0;
118
119 let mut sorted_dupes: Vec<(&i32, &String)> = duplicated_data.iter().collect();
121 sorted_dupes.sort_by(|(a, _), (b, _)| a.cmp(b));
122
123 for (participant_id, participant) in sorted_dupes {
124 if let Some(id) = participant_to_unique_participant_id.get(participant) {
125 deduplicated_participants.insert(*participant_id, *id);
126 } else {
127 participant_to_unique_participant_id
128 .insert(participant.to_owned(), unique_participant_identifier);
129 deduplicated_participants
130 .insert(*participant_id, unique_participant_identifier);
131 unique_participant_identifier += 1;
132 }
133 }
134 deduplicated_participants
135 }
136}
137
138impl Handle {
140 pub fn run_diagnostic(db: &Connection) -> Result<HandleDiagnostic, TableError> {
160 let query = concat!(
161 "SELECT COUNT(DISTINCT person_centric_id) ",
162 "FROM handle ",
163 "WHERE person_centric_id NOT NULL"
164 );
165
166 let handles_with_multiple_ids = if let Ok(mut rows) = db.prepare(query) {
167 rows.query_row([], |r| r.get::<_, i64>(0))
168 .ok()
169 .and_then(|count| usize::try_from(count).ok())
170 .unwrap_or(0)
171 } else {
172 0
173 };
174
175 let all_handles = Self::cache(db)?;
177
178 let unique_handles = Self::dedupe(&all_handles);
180
181 let total_duplicated =
183 all_handles.len() - HashSet::<&i32>::from_iter(unique_handles.values()).len();
184
185 Ok(HandleDiagnostic {
186 total_handles: all_handles.len(),
187 handles_with_multiple_ids,
188 total_duplicated,
189 })
190 }
191}
192
193impl Handle {
195 fn get_person_id_map(db: &Connection) -> Result<HashMap<i32, String>, TableError> {
201 let mut person_to_id: HashMap<String, BTreeSet<String>> = HashMap::new();
202 let mut row_to_id: HashMap<i32, String> = HashMap::new();
203 let mut row_data: Vec<(String, i32, String)> = vec![];
204
205 let query = concat!(
207 "SELECT DISTINCT A.person_centric_id, A.rowid, A.id ",
208 "FROM handle A ",
209 "INNER JOIN handle B ON B.id = A.id ",
210 "WHERE A.person_centric_id NOT NULL ",
211 "ORDER BY A.person_centric_id",
212 );
213 let statement = db.prepare(query);
214
215 if let Ok(mut statement) = statement {
216 let contacts = statement.query_map([], |row| {
218 let person_centric_id: String = row.get(0)?;
219 let rowid: i32 = row.get(1)?;
220 let id: String = row.get(2)?;
221 Ok((person_centric_id, rowid, id))
222 })?;
223
224 for contact in contacts {
225 row_data.push(contact?);
226 }
227
228 for contact in &row_data {
230 let (person_centric_id, _, id) = contact;
231 if let Some(set) = person_to_id.get_mut(person_centric_id) {
232 set.insert(id.to_owned());
233 } else {
234 let mut set = BTreeSet::new();
235 set.insert(id.to_owned());
236 person_to_id.insert(person_centric_id.to_owned(), set);
237 }
238 }
239
240 for contact in &row_data {
242 let (person_centric_id, rowid, _) = contact;
243 let data_to_insert = match person_to_id.get_mut(person_centric_id) {
244 Some(person) => person.iter().cloned().collect::<Vec<String>>().join(" "),
245 None => continue,
246 };
247 row_to_id.insert(rowid.to_owned(), data_to_insert);
248 }
249 }
250
251 Ok(row_to_id)
252 }
253}
254
255#[cfg(test)]
257mod tests {
258 use crate::tables::handle::Handle;
259 use std::collections::{HashMap, HashSet};
260
261 #[test]
262 fn test_can_dedupe() {
263 let mut input: HashMap<i32, String> = HashMap::new();
264 input.insert(1, String::from("A")); input.insert(2, String::from("A")); input.insert(3, String::from("A")); input.insert(4, String::from("B")); input.insert(5, String::from("B")); input.insert(6, String::from("C")); let output = Handle::dedupe(&input);
272 let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
273 assert_eq!(expected_deduped_ids.len(), 3);
274 }
275
276 #[test]
277 fn test_same_values() {
279 let mut input_1: HashMap<i32, String> = HashMap::new();
280 input_1.insert(1, String::from("A"));
281 input_1.insert(2, String::from("A"));
282 input_1.insert(3, String::from("A"));
283 input_1.insert(4, String::from("B"));
284 input_1.insert(5, String::from("B"));
285 input_1.insert(6, String::from("C"));
286
287 let mut input_2: HashMap<i32, String> = HashMap::new();
288 input_2.insert(1, String::from("A"));
289 input_2.insert(2, String::from("A"));
290 input_2.insert(3, String::from("A"));
291 input_2.insert(4, String::from("B"));
292 input_2.insert(5, String::from("B"));
293 input_2.insert(6, String::from("C"));
294
295 let mut input_3: HashMap<i32, String> = HashMap::new();
296 input_3.insert(1, String::from("A"));
297 input_3.insert(2, String::from("A"));
298 input_3.insert(3, String::from("A"));
299 input_3.insert(4, String::from("B"));
300 input_3.insert(5, String::from("B"));
301 input_3.insert(6, String::from("C"));
302
303 let mut output_1 = Handle::dedupe(&input_1)
304 .into_iter()
305 .collect::<Vec<(i32, i32)>>();
306 let mut output_2 = Handle::dedupe(&input_2)
307 .into_iter()
308 .collect::<Vec<(i32, i32)>>();
309 let mut output_3 = Handle::dedupe(&input_3)
310 .into_iter()
311 .collect::<Vec<(i32, i32)>>();
312
313 output_1.sort_unstable();
314 output_2.sort_unstable();
315 output_3.sort_unstable();
316
317 assert_eq!(output_1, output_2);
318 assert_eq!(output_1, output_3);
319 assert_eq!(output_2, output_3);
320 }
321}