imessage_database/tables/
handle.rs

1/*!
2 This module represents common (but not all) columns in the `handle` table.
3*/
4
5use rusqlite::{CachedStatement, Connection, Error, Result, Row};
6use std::collections::{BTreeSet, HashMap, HashSet};
7
8use crate::{
9    error::table::TableError,
10    tables::table::{Cacheable, Deduplicate, Diagnostic, HANDLE, ME, Table},
11    util::output::{done_processing, processing},
12};
13
14// MARK: Handle
15/// Represents a single row in the `handle` table.
16#[derive(Debug)]
17pub struct Handle {
18    /// The unique identifier for the handle in the database
19    pub rowid: i32,
20    /// Identifier for a contact, i.e. a phone number or email address
21    pub id: String,
22    /// Field used to disambiguate divergent handles that represent the same contact
23    pub person_centric_id: Option<String>,
24}
25
26// MARK: Table
27impl Table for Handle {
28    fn from_row(row: &Row) -> Result<Handle> {
29        Ok(Handle {
30            rowid: row.get("rowid")?,
31            id: row.get("id")?,
32            person_centric_id: row.get("person_centric_id").unwrap_or(None),
33        })
34    }
35
36    fn get(db: &'_ Connection) -> Result<CachedStatement<'_>, TableError> {
37        Ok(db.prepare_cached(&format!("SELECT * from {HANDLE}"))?)
38    }
39
40    fn extract(handle: Result<Result<Self, Error>, Error>) -> Result<Self, TableError> {
41        match handle {
42            Ok(Ok(handle)) => Ok(handle),
43            Err(why) | Ok(Err(why)) => Err(TableError::QueryError(why)),
44        }
45    }
46}
47
48// MARK: Cache
49impl Cacheable for Handle {
50    type K = i32;
51    type V = String;
52    /// Generate a `HashMap` for looking up contacts by their IDs, collapsing
53    /// duplicate contacts to the same ID String regardless of service
54    ///
55    /// # Example:
56    ///
57    /// ```
58    /// use imessage_database::util::dirs::default_db_path;
59    /// use imessage_database::tables::table::{Cacheable, get_connection};
60    /// use imessage_database::tables::handle::Handle;
61    ///
62    /// let db_path = default_db_path();
63    /// let conn = get_connection(&db_path).unwrap();
64    /// let chatrooms = Handle::cache(&conn);
65    /// ```
66    fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
67        // Create cache for user IDs
68        let mut map = HashMap::new();
69        // Handle ID 0 is self in group chats
70        map.insert(0, ME.to_string());
71
72        // Create query
73        let mut statement = Handle::get(db)?;
74
75        // Execute query to build the Handles
76        let handles = statement.query_map([], |row| Ok(Handle::from_row(row)))?;
77
78        // Iterate over the handles and update the map
79        for handle in handles {
80            let contact = Handle::extract(handle)?;
81            map.insert(contact.rowid, contact.id);
82        }
83
84        // Condense contacts that share person_centric_id so their IDs map to the same strings
85        let dupe_contacts = Handle::get_person_id_map(db)?;
86        for contact in dupe_contacts {
87            let (id, new) = contact;
88            map.insert(id, new);
89        }
90
91        // Done!
92        Ok(map)
93    }
94}
95
96// MARK: Dedupe
97impl Deduplicate for Handle {
98    type T = String;
99
100    /// Given the initial set of duplicated handles, deduplicate them
101    ///
102    /// This returns a new hashmap that maps the real handle ID to a new deduplicated unique handle ID
103    /// that represents a single handle for all of the deduplicate handles.
104    ///
105    /// Assuming no new handles have been written to the database, deduplicated data is deterministic across runs.
106    ///
107    /// # Example:
108    ///
109    /// ```
110    /// use imessage_database::util::dirs::default_db_path;
111    /// use imessage_database::tables::table::{Cacheable, Deduplicate, get_connection};
112    /// use imessage_database::tables::handle::Handle;
113    ///
114    /// let db_path = default_db_path();
115    /// let conn = get_connection(&db_path).unwrap();
116    /// let handles = Handle::cache(&conn).unwrap();
117    /// let deduped_handles = Handle::dedupe(&handles);
118    /// ```
119    fn dedupe(duplicated_data: &HashMap<i32, Self::T>) -> HashMap<i32, i32> {
120        let mut deduplicated_participants: HashMap<i32, i32> = HashMap::new();
121        let mut participant_to_unique_participant_id: HashMap<Self::T, i32> = HashMap::new();
122
123        // Build cache of each unique set of participants to a new identifier:
124        let mut unique_participant_identifier = 0;
125
126        // Iterate over the values in a deterministic order
127        let mut sorted_dupes: Vec<(&i32, &Self::T)> = duplicated_data.iter().collect();
128        sorted_dupes.sort_by(|(a, _), (b, _)| a.cmp(b));
129
130        for (participant_id, participant) in sorted_dupes {
131            if let Some(id) = participant_to_unique_participant_id.get(participant) {
132                deduplicated_participants.insert(participant_id.to_owned(), id.to_owned());
133            } else {
134                participant_to_unique_participant_id
135                    .insert(participant.to_owned(), unique_participant_identifier);
136                deduplicated_participants
137                    .insert(participant_id.to_owned(), unique_participant_identifier);
138                unique_participant_identifier += 1;
139            }
140        }
141        deduplicated_participants
142    }
143}
144
145// MARK: Diagnostic
146impl Diagnostic for Handle {
147    /// Emit diagnostic data for the Handles table
148    ///
149    /// Get the number of handles that are duplicated
150    ///
151    /// The `person_centric_id` is used to map handles that represent the
152    /// same contact across ids (numbers, emails, etc) and across
153    /// services (iMessage, Jabber, iChat, SMS, etc)
154    ///
155    /// In some databases, `person_centric_id` may not be available.
156    ///
157    /// # Example:
158    ///
159    /// ```
160    /// use imessage_database::util::dirs::default_db_path;
161    /// use imessage_database::tables::table::{Diagnostic, get_connection};
162    /// use imessage_database::tables::handle::Handle;
163    ///
164    /// let db_path = default_db_path();
165    /// let conn = get_connection(&db_path).unwrap();
166    /// Handle::run_diagnostic(&conn);
167    /// ```
168    fn run_diagnostic(db: &Connection) -> Result<(), TableError> {
169        let query = concat!(
170            "SELECT COUNT(DISTINCT person_centric_id) ",
171            "FROM handle ",
172            "WHERE person_centric_id NOT NULL"
173        );
174
175        if let Ok(mut rows) = db.prepare(query) {
176            processing();
177
178            // Get number of handles with identical person_centric_ids
179            let handles_with_identical_ids: i32 = rows.query_row([], |r| r.get(0)).unwrap_or(0);
180
181            // Cache all handles
182            let all_handles = Self::cache(db)?;
183
184            // Deduplicate handles
185            let unique_handles = Self::dedupe(&all_handles);
186
187            // Calculate total duplicated handles
188            let total_dupes =
189                all_handles.len() - HashSet::<&i32>::from_iter(unique_handles.values()).len();
190
191            done_processing();
192
193            println!("Handle diagnostic data:");
194            println!("    Total handles: {}", all_handles.len());
195            if handles_with_identical_ids > 0 || total_dupes > 0 {
196                if handles_with_identical_ids > 0 {
197                    println!("    Handles with more than one ID: {handles_with_identical_ids}");
198                }
199                if total_dupes > 0 {
200                    println!("    Total duplicated handles: {total_dupes}");
201                }
202            }
203        }
204
205        Ok(())
206    }
207}
208
209// MARK: Impl
210impl Handle {
211    /// The handles table does not have a lot of information and can have many duplicate values.
212    ///
213    /// This method generates a hashmap of each separate item in this table to a combined string
214    /// that represents all of the copies, so any handle ID will always map to the same string
215    /// for a given chat participant
216    fn get_person_id_map(db: &Connection) -> Result<HashMap<i32, String>, TableError> {
217        let mut person_to_id: HashMap<String, BTreeSet<String>> = HashMap::new();
218        let mut row_to_id: HashMap<i32, String> = HashMap::new();
219        let mut row_data: Vec<(String, i32, String)> = vec![];
220
221        // Build query
222        let query = concat!(
223            "SELECT DISTINCT A.person_centric_id, A.rowid, A.id ",
224            "FROM handle A ",
225            "INNER JOIN handle B ON B.id = A.id ",
226            "WHERE A.person_centric_id NOT NULL ",
227            "ORDER BY A.person_centric_id",
228        );
229        let statement = db.prepare(query);
230
231        if let Ok(mut statement) = statement {
232            // Cache the results of the query in memory
233            let contacts = statement.query_map([], |row| {
234                let person_centric_id: String = row.get(0)?;
235                let rowid: i32 = row.get(1)?;
236                let id: String = row.get(2)?;
237                Ok((person_centric_id, rowid, id))
238            })?;
239
240            for contact in contacts {
241                row_data.push(contact?);
242            }
243
244            // First pass: generate a map of each person_centric_id to its matching ids
245            for contact in &row_data {
246                let (person_centric_id, _, id) = contact;
247                if let Some(set) = person_to_id.get_mut(person_centric_id) {
248                    set.insert(id.to_owned());
249                } else {
250                    let mut set = BTreeSet::new();
251                    set.insert(id.to_owned());
252                    person_to_id.insert(person_centric_id.to_owned(), set);
253                }
254            }
255
256            // Second pass: point each ROWID to the matching ids
257            for contact in &row_data {
258                let (person_centric_id, rowid, _) = contact;
259                let data_to_insert = match person_to_id.get_mut(person_centric_id) {
260                    Some(person) => person.iter().cloned().collect::<Vec<String>>().join(" "),
261                    None => panic!("Attempted to resolve contact with no person_centric_id!"),
262                };
263                row_to_id.insert(rowid.to_owned(), data_to_insert);
264            }
265        }
266
267        Ok(row_to_id)
268    }
269}
270
271// MARK: Tests
272#[cfg(test)]
273mod tests {
274    use crate::tables::{handle::Handle, table::Deduplicate};
275    use std::collections::{HashMap, HashSet};
276
277    #[test]
278    fn test_can_dedupe() {
279        let mut input: HashMap<i32, String> = HashMap::new();
280        input.insert(1, String::from("A")); // 0
281        input.insert(2, String::from("A")); // 0
282        input.insert(3, String::from("A")); // 0
283        input.insert(4, String::from("B")); // 1
284        input.insert(5, String::from("B")); // 1
285        input.insert(6, String::from("C")); // 2
286
287        let output = Handle::dedupe(&input);
288        let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
289        assert_eq!(expected_deduped_ids.len(), 3);
290    }
291
292    #[test]
293    // Simulate 3 runs of the program and ensure that the order of the deduplicated contacts is stable
294    fn test_same_values() {
295        let mut input_1: HashMap<i32, String> = HashMap::new();
296        input_1.insert(1, String::from("A"));
297        input_1.insert(2, String::from("A"));
298        input_1.insert(3, String::from("A"));
299        input_1.insert(4, String::from("B"));
300        input_1.insert(5, String::from("B"));
301        input_1.insert(6, String::from("C"));
302
303        let mut input_2: HashMap<i32, String> = HashMap::new();
304        input_2.insert(1, String::from("A"));
305        input_2.insert(2, String::from("A"));
306        input_2.insert(3, String::from("A"));
307        input_2.insert(4, String::from("B"));
308        input_2.insert(5, String::from("B"));
309        input_2.insert(6, String::from("C"));
310
311        let mut input_3: HashMap<i32, String> = HashMap::new();
312        input_3.insert(1, String::from("A"));
313        input_3.insert(2, String::from("A"));
314        input_3.insert(3, String::from("A"));
315        input_3.insert(4, String::from("B"));
316        input_3.insert(5, String::from("B"));
317        input_3.insert(6, String::from("C"));
318
319        let mut output_1 = Handle::dedupe(&input_1)
320            .into_iter()
321            .collect::<Vec<(i32, i32)>>();
322        let mut output_2 = Handle::dedupe(&input_2)
323            .into_iter()
324            .collect::<Vec<(i32, i32)>>();
325        let mut output_3 = Handle::dedupe(&input_3)
326            .into_iter()
327            .collect::<Vec<(i32, i32)>>();
328
329        output_1.sort_unstable();
330        output_2.sort_unstable();
331        output_3.sort_unstable();
332
333        assert_eq!(output_1, output_2);
334        assert_eq!(output_1, output_3);
335        assert_eq!(output_2, output_3);
336    }
337}