Skip to main content

imessage_database/tables/
handle.rs

1/*!
2 This module represents common (but not all) columns in the `handle` table.
3*/
4
5use rusqlite::{CachedStatement, Connection, Result, Row};
6use std::collections::{BTreeSet, HashMap, HashSet};
7
8use crate::{
9    error::table::TableError,
10    tables::{
11        diagnostic::HandleDiagnostic,
12        table::{Cacheable, HANDLE, ME, Table},
13    },
14};
15
16// MARK: Handle
17/// Represents a single row in the `handle` table.
18#[derive(Debug)]
19pub struct Handle {
20    /// The unique identifier for the handle in the database
21    pub rowid: i32,
22    /// Identifier for a contact, i.e. a phone number or email address
23    pub id: String,
24    /// Field used to disambiguate divergent handles that represent the same contact
25    pub person_centric_id: Option<String>,
26}
27
28// MARK: Table
29impl Table for Handle {
30    fn from_row(row: &Row) -> Result<Handle> {
31        Ok(Handle {
32            rowid: row.get("rowid")?,
33            id: row.get("id")?,
34            person_centric_id: row.get("person_centric_id").unwrap_or(None),
35        })
36    }
37
38    fn get(db: &'_ Connection) -> Result<CachedStatement<'_>, TableError> {
39        Ok(db.prepare_cached(&format!("SELECT * from {HANDLE}"))?)
40    }
41}
42
43// MARK: Cache
44impl Cacheable for Handle {
45    type K = i32;
46    type V = String;
47    /// Generate a `HashMap` for looking up contacts by their IDs, collapsing
48    /// duplicate contacts to the same ID String regardless of service
49    ///
50    /// # Example:
51    ///
52    /// ```no_run
53    /// use imessage_database::util::dirs::default_db_path;
54    /// use imessage_database::tables::table::{Cacheable, get_connection};
55    /// use imessage_database::tables::handle::Handle;
56    ///
57    /// let db_path = default_db_path();
58    /// let conn = get_connection(&db_path).unwrap();
59    /// let chatrooms = Handle::cache(&conn);
60    /// ```
61    fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
62        // Create cache for user IDs
63        let mut map = HashMap::new();
64        // Handle ID 0 is self in group chats
65        map.insert(0, ME.to_string());
66
67        // Create query
68        let mut statement = Handle::get(db)?;
69
70        // Iterate over the handles and update the map
71        for handle in Handle::rows(&mut statement, [])? {
72            let contact = handle?;
73            map.insert(contact.rowid, contact.id);
74        }
75
76        // Condense contacts that share person_centric_id so their IDs map to the same strings
77        let dupe_contacts = Handle::get_person_id_map(db)?;
78        for contact in dupe_contacts {
79            let (id, new) = contact;
80            map.insert(id, new);
81        }
82
83        // Done!
84        Ok(map)
85    }
86}
87
88// MARK: Dedupe
89impl Handle {
90    /// Given the initial set of duplicated handles, deduplicate them
91    ///
92    /// This returns a new hashmap that maps the real handle ID to a new deduplicated unique handle ID
93    /// that represents a single handle for all of the deduplicate handles.
94    ///
95    /// Assuming no new handles have been written to the database, deduplicated data is deterministic across runs.
96    ///
97    /// # Example:
98    ///
99    /// ```no_run
100    /// use imessage_database::util::dirs::default_db_path;
101    /// use imessage_database::tables::table::{Cacheable, get_connection};
102    /// use imessage_database::tables::handle::Handle;
103    ///
104    /// let db_path = default_db_path();
105    /// let conn = get_connection(&db_path).unwrap();
106    /// let handles = Handle::cache(&conn).unwrap();
107    /// let deduped_handles = Handle::dedupe(&handles);
108    /// ```
109    pub fn dedupe(duplicated_data: &HashMap<i32, String>) -> HashMap<i32, i32> {
110        let mut deduplicated_participants: HashMap<i32, i32> = HashMap::new();
111        let mut participant_to_unique_participant_id: HashMap<String, i32> = HashMap::new();
112
113        // Build cache of each unique set of participants to a new identifier:
114        let mut unique_participant_identifier = 0;
115
116        // Iterate over the values in a deterministic order
117        let mut sorted_dupes: Vec<(&i32, &String)> = duplicated_data.iter().collect();
118        sorted_dupes.sort_by_key(|(a, _)| *a);
119
120        for (participant_id, participant) in sorted_dupes {
121            if let Some(id) = participant_to_unique_participant_id.get(participant) {
122                deduplicated_participants.insert(*participant_id, *id);
123            } else {
124                participant_to_unique_participant_id
125                    .insert(participant.to_owned(), unique_participant_identifier);
126                deduplicated_participants.insert(*participant_id, unique_participant_identifier);
127                unique_participant_identifier += 1;
128            }
129        }
130        deduplicated_participants
131    }
132}
133
134// MARK: Diagnostic
135impl Handle {
136    /// Compute diagnostic data for the Handles table
137    ///
138    /// Counts the number of handles that are duplicated. The `person_centric_id`
139    /// is used to map handles that represent the same contact across ids (numbers,
140    /// emails, etc) and across services (iMessage, Jabber, iChat, SMS, etc).
141    ///
142    /// In some databases, `person_centric_id` may not be available.
143    ///
144    /// # Example:
145    ///
146    /// ```no_run
147    /// use imessage_database::util::dirs::default_db_path;
148    /// use imessage_database::tables::table::get_connection;
149    /// use imessage_database::tables::handle::Handle;
150    ///
151    /// let db_path = default_db_path();
152    /// let conn = get_connection(&db_path).unwrap();
153    /// Handle::run_diagnostic(&conn);
154    /// ```
155    pub fn run_diagnostic(db: &Connection) -> Result<HandleDiagnostic, TableError> {
156        let query = concat!(
157            "SELECT COUNT(DISTINCT person_centric_id) ",
158            "FROM handle ",
159            "WHERE person_centric_id NOT NULL"
160        );
161
162        let handles_with_multiple_ids = if let Ok(mut rows) = db.prepare(query) {
163            rows.query_row([], |r| r.get::<_, i64>(0))
164                .ok()
165                .and_then(|count| usize::try_from(count).ok())
166                .unwrap_or(0)
167        } else {
168            0
169        };
170
171        // Cache all handles
172        let all_handles = Self::cache(db)?;
173
174        // Deduplicate handles
175        let unique_handles = Self::dedupe(&all_handles);
176
177        // Calculate total duplicated handles
178        let total_duplicated =
179            all_handles.len() - HashSet::<&i32>::from_iter(unique_handles.values()).len();
180
181        Ok(HandleDiagnostic {
182            total_handles: all_handles.len(),
183            handles_with_multiple_ids,
184            total_duplicated,
185        })
186    }
187}
188
189// MARK: Impl
190impl Handle {
191    /// The handles table does not have a lot of information and can have many duplicate values.
192    ///
193    /// This method generates a hashmap of each separate item in this table to a combined string
194    /// that represents all of the copies, so any handle ID will always map to the same string
195    /// for a given chat participant
196    fn get_person_id_map(db: &Connection) -> Result<HashMap<i32, String>, TableError> {
197        let mut person_to_id: HashMap<String, BTreeSet<String>> = HashMap::new();
198        let mut row_to_id: HashMap<i32, String> = HashMap::new();
199        let mut row_data: Vec<(String, i32, String)> = vec![];
200
201        // Build query
202        let query = concat!(
203            "SELECT DISTINCT A.person_centric_id, A.rowid, A.id ",
204            "FROM handle A ",
205            "INNER JOIN handle B ON B.id = A.id ",
206            "WHERE A.person_centric_id NOT NULL ",
207            "ORDER BY A.person_centric_id",
208        );
209        let statement = db.prepare(query);
210
211        if let Ok(mut statement) = statement {
212            // Cache the results of the query in memory
213            let contacts = statement.query_map([], |row| {
214                let person_centric_id: String = row.get(0)?;
215                let rowid: i32 = row.get(1)?;
216                let id: String = row.get(2)?;
217                Ok((person_centric_id, rowid, id))
218            })?;
219
220            for contact in contacts {
221                row_data.push(contact?);
222            }
223
224            // First pass: generate a map of each person_centric_id to its matching ids
225            for contact in &row_data {
226                let (person_centric_id, _, id) = contact;
227                if let Some(set) = person_to_id.get_mut(person_centric_id) {
228                    set.insert(id.to_owned());
229                } else {
230                    let mut set = BTreeSet::new();
231                    set.insert(id.to_owned());
232                    person_to_id.insert(person_centric_id.to_owned(), set);
233                }
234            }
235
236            // Second pass: point each ROWID to the matching ids
237            for contact in &row_data {
238                let (person_centric_id, rowid, _) = contact;
239                let data_to_insert = match person_to_id.get_mut(person_centric_id) {
240                    Some(person) => person.iter().cloned().collect::<Vec<String>>().join(" "),
241                    None => continue,
242                };
243                row_to_id.insert(rowid.to_owned(), data_to_insert);
244            }
245        }
246
247        Ok(row_to_id)
248    }
249}
250
251// MARK: Tests
252#[cfg(test)]
253mod tests {
254    use crate::tables::handle::Handle;
255    use std::collections::{HashMap, HashSet};
256
257    #[test]
258    fn test_can_dedupe() {
259        let mut input: HashMap<i32, String> = HashMap::new();
260        input.insert(1, String::from("A")); // 0
261        input.insert(2, String::from("A")); // 0
262        input.insert(3, String::from("A")); // 0
263        input.insert(4, String::from("B")); // 1
264        input.insert(5, String::from("B")); // 1
265        input.insert(6, String::from("C")); // 2
266
267        let output = Handle::dedupe(&input);
268        let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
269        assert_eq!(expected_deduped_ids.len(), 3);
270    }
271
272    #[test]
273    // Simulate 3 runs of the program and ensure that the order of the deduplicated contacts is stable
274    fn test_same_values() {
275        let mut input_1: HashMap<i32, String> = HashMap::new();
276        input_1.insert(1, String::from("A"));
277        input_1.insert(2, String::from("A"));
278        input_1.insert(3, String::from("A"));
279        input_1.insert(4, String::from("B"));
280        input_1.insert(5, String::from("B"));
281        input_1.insert(6, String::from("C"));
282
283        let mut input_2: HashMap<i32, String> = HashMap::new();
284        input_2.insert(1, String::from("A"));
285        input_2.insert(2, String::from("A"));
286        input_2.insert(3, String::from("A"));
287        input_2.insert(4, String::from("B"));
288        input_2.insert(5, String::from("B"));
289        input_2.insert(6, String::from("C"));
290
291        let mut input_3: HashMap<i32, String> = HashMap::new();
292        input_3.insert(1, String::from("A"));
293        input_3.insert(2, String::from("A"));
294        input_3.insert(3, String::from("A"));
295        input_3.insert(4, String::from("B"));
296        input_3.insert(5, String::from("B"));
297        input_3.insert(6, String::from("C"));
298
299        let mut output_1 = Handle::dedupe(&input_1)
300            .into_iter()
301            .collect::<Vec<(i32, i32)>>();
302        let mut output_2 = Handle::dedupe(&input_2)
303            .into_iter()
304            .collect::<Vec<(i32, i32)>>();
305        let mut output_3 = Handle::dedupe(&input_3)
306            .into_iter()
307            .collect::<Vec<(i32, i32)>>();
308
309        output_1.sort_unstable();
310        output_2.sort_unstable();
311        output_3.sort_unstable();
312
313        assert_eq!(output_1, output_2);
314        assert_eq!(output_1, output_3);
315        assert_eq!(output_2, output_3);
316    }
317}