imessage_database/tables/
handle.rs

1/*!
2 This module represents common (but not all) columns in the `handle` table.
3*/
4
5use rusqlite::{Connection, Error, Result, Row, Statement};
6use std::collections::{BTreeSet, HashMap};
7
8use crate::{
9    error::table::TableError,
10    tables::table::{Cacheable, Deduplicate, Diagnostic, Table, HANDLE, ME},
11    util::output::{done_processing, processing},
12};
13
14/// Represents a single row in the `handle` table.
15#[derive(Debug)]
16pub struct Handle {
17    pub rowid: i32,
18    /// Identifier for a contact, i.e. a phone number or email address
19    pub id: String,
20    /// Field used to disambiguate divergent handles that represent the same contact
21    pub person_centric_id: Option<String>,
22}
23
24impl Table for Handle {
25    fn from_row(row: &Row) -> Result<Handle> {
26        Ok(Handle {
27            rowid: row.get("rowid")?,
28            id: row.get("id")?,
29            person_centric_id: row.get("person_centric_id").unwrap_or(None),
30        })
31    }
32
33    fn get(db: &Connection) -> Result<Statement, TableError> {
34        db.prepare(&format!("SELECT * from {HANDLE}"))
35            .map_err(TableError::Handle)
36    }
37
38    fn extract(handle: Result<Result<Self, Error>, Error>) -> Result<Self, TableError> {
39        match handle {
40            Ok(Ok(handle)) => Ok(handle),
41            Err(why) | Ok(Err(why)) => Err(TableError::Handle(why)),
42        }
43    }
44}
45
46impl Cacheable for Handle {
47    type K = i32;
48    type V = String;
49    /// Generate a `HashMap` for looking up contacts by their IDs, collapsing
50    /// duplicate contacts to the same ID String regardless of service
51    ///
52    /// # Example:
53    ///
54    /// ```
55    /// use imessage_database::util::dirs::default_db_path;
56    /// use imessage_database::tables::table::{Cacheable, get_connection};
57    /// use imessage_database::tables::handle::Handle;
58    ///
59    /// let db_path = default_db_path();
60    /// let conn = get_connection(&db_path).unwrap();
61    /// let chatrooms = Handle::cache(&conn);
62    /// ```
63    fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
64        // Create cache for user IDs
65        let mut map = HashMap::new();
66        // Handle ID 0 is self in group chats
67        map.insert(0, ME.to_string());
68
69        // Create query
70        let mut statement = Handle::get(db)?;
71
72        // Execute query to build the Handles
73        let handles = statement
74            .query_map([], |row| Ok(Handle::from_row(row)))
75            .map_err(TableError::Handle)?;
76
77        // Iterate over the handles and update the map
78        for handle in handles {
79            let contact = Handle::extract(handle)?;
80            map.insert(contact.rowid, contact.id);
81        }
82
83        // Condense contacts that share person_centric_id so their IDs map to the same strings
84        let dupe_contacts = Handle::get_person_id_map(db)?;
85        for contact in dupe_contacts {
86            let (id, new) = contact;
87            map.insert(id, new);
88        }
89
90        // Done!
91        Ok(map)
92    }
93}
94
95impl Deduplicate for Handle {
96    type T = String;
97
98    /// Given the initial set of duplicated handles, deduplicate them
99    ///
100    /// This returns a new hashmap that maps the real handle ID to a new deduplicated unique handle ID
101    /// that represents a single handle for all of the deduplicate handles.
102    ///
103    /// Assuming no new handles have been written to the database, deduplicated data is deterministic across runs.
104    /// 
105    /// # Example:
106    ///
107    /// ```
108    /// use imessage_database::util::dirs::default_db_path;
109    /// use imessage_database::tables::table::{Cacheable, Deduplicate, get_connection};
110    /// use imessage_database::tables::handle::Handle;
111    ///
112    /// let db_path = default_db_path();
113    /// let conn = get_connection(&db_path).unwrap();
114    /// let chatrooms = Handle::cache(&conn).unwrap();
115    /// let deduped_chatrooms = Handle::dedupe(&chatrooms);
116    /// ```
117    fn dedupe(duplicated_data: &HashMap<i32, Self::T>) -> HashMap<i32, i32> {
118        let mut deduplicated_participants: HashMap<i32, i32> = HashMap::new();
119        let mut participant_to_unique_participant_id: HashMap<Self::T, i32> = HashMap::new();
120
121        // Build cache of each unique set of participants to a new identifier:
122        let mut unique_participant_identifier = 0;
123
124        // Iterate over the values in a deterministic order
125        let mut sorted_dupes: Vec<(&i32, &Self::T)> = duplicated_data.iter().collect();
126        sorted_dupes.sort_by(|(a, _), (b, _)| a.cmp(b));
127
128        for (participant_id, participant) in sorted_dupes {
129            if let Some(id) = participant_to_unique_participant_id.get(participant) {
130                deduplicated_participants.insert(participant_id.to_owned(), id.to_owned());
131            } else {
132                participant_to_unique_participant_id
133                    .insert(participant.to_owned(), unique_participant_identifier);
134                deduplicated_participants
135                    .insert(participant_id.to_owned(), unique_participant_identifier);
136                unique_participant_identifier += 1;
137            }
138        }
139        deduplicated_participants
140    }
141}
142
143impl Diagnostic for Handle {
144    /// Emit diagnostic data for the Handles table
145    ///
146    /// Get the number of handles that are duplicated
147    ///
148    /// The `person_centric_id` is used to map handles that represent the
149    /// same contact across ids (numbers, emails, etc) and across
150    /// services (iMessage, Jabber, iChat, SMS, etc)
151    ///
152    /// In some databases, `person_centric_id` may not be available.
153    ///
154    /// # Example:
155    ///
156    /// ```
157    /// use imessage_database::util::dirs::default_db_path;
158    /// use imessage_database::tables::table::{Diagnostic, get_connection};
159    /// use imessage_database::tables::handle::Handle;
160    ///
161    /// let db_path = default_db_path();
162    /// let conn = get_connection(&db_path).unwrap();
163    /// Handle::run_diagnostic(&conn);
164    /// ```
165    fn run_diagnostic(db: &Connection) -> Result<(), TableError> {
166        let query = concat!(
167            "SELECT COUNT(DISTINCT person_centric_id) ",
168            "FROM handle ",
169            "WHERE person_centric_id NOT NULL"
170        );
171
172        if let Ok(mut rows) = db.prepare(query).map_err(TableError::Handle) {
173            processing();
174
175            let count_dupes: Option<i32> = rows
176                .query_row([], |r| r.get(0))
177                .map_err(TableError::Handle)?;
178
179            done_processing();
180
181            if let Some(dupes) = count_dupes {
182                if dupes > 0 {
183                    println!("Handle diagnostic data:");
184                    println!("    Contacts with more than one ID: {dupes}");
185                }
186            }
187        }
188
189        Ok(())
190    }
191}
192
193impl Handle {
194    /// The handles table does not have a lot of information and can have many duplicate values.
195    ///
196    /// This method generates a hashmap of each separate item in this table to a combined string
197    /// that represents all of the copies, so any handle ID will always map to the same string
198    /// for a given chat participant
199    fn get_person_id_map(db: &Connection) -> Result<HashMap<i32, String>, TableError> {
200        let mut person_to_id: HashMap<String, BTreeSet<String>> = HashMap::new();
201        let mut row_to_id: HashMap<i32, String> = HashMap::new();
202        let mut row_data: Vec<(String, i32, String)> = vec![];
203
204        // Build query
205        let query = concat!(
206            "SELECT DISTINCT A.person_centric_id, A.rowid, A.id ",
207            "FROM handle A ",
208            "INNER JOIN handle B ON B.id = A.id ",
209            "WHERE A.person_centric_id NOT NULL ",
210            "ORDER BY A.person_centric_id",
211        );
212        let statement = db.prepare(query);
213
214        if let Ok(mut statement) = statement {
215            // Cache the results of the query in memory
216            let contacts = statement
217                .query_map([], |row| {
218                    let person_centric_id: String = row.get(0)?;
219                    let rowid: i32 = row.get(1)?;
220                    let id: String = row.get(2)?;
221                    Ok((person_centric_id, rowid, id))
222                })
223                .map_err(TableError::Handle)?;
224
225            for contact in contacts {
226                match contact {
227                    Ok(tup) => {
228                        row_data.push(tup);
229                    }
230                    Err(why) => return Err(TableError::Handle(why)),
231                }
232            }
233
234            // First pass: generate a map of each person_centric_id to its matching ids
235            for contact in &row_data {
236                let (person_centric_id, _, id) = contact;
237                if let Some(set) = person_to_id.get_mut(person_centric_id) {
238                    set.insert(id.to_owned());
239                } else {
240                    let mut set = BTreeSet::new();
241                    set.insert(id.to_owned());
242                    person_to_id.insert(person_centric_id.to_owned(), set);
243                }
244            }
245
246            // Second pass: point each ROWID to the matching ids
247            for contact in &row_data {
248                let (person_centric_id, rowid, _) = contact;
249                let data_to_insert = match person_to_id.get_mut(person_centric_id) {
250                    Some(person) => person.iter().cloned().collect::<Vec<String>>().join(" "),
251                    None => panic!("Attempted to resolve contact with no person_centric_id!"),
252                };
253                row_to_id.insert(rowid.to_owned(), data_to_insert);
254            }
255        }
256
257        Ok(row_to_id)
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use crate::tables::{handle::Handle, table::Deduplicate};
264    use std::collections::{HashMap, HashSet};
265
266    #[test]
267    fn test_can_dedupe() {
268        let mut input: HashMap<i32, String> = HashMap::new();
269        input.insert(1, String::from("A")); // 0
270        input.insert(2, String::from("A")); // 0
271        input.insert(3, String::from("A")); // 0
272        input.insert(4, String::from("B")); // 1
273        input.insert(5, String::from("B")); // 1
274        input.insert(6, String::from("C")); // 2
275
276        let output = Handle::dedupe(&input);
277        let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
278        assert_eq!(expected_deduped_ids.len(), 3);
279    }
280
281    #[test]
282    // Simulate 3 runs of the program and ensure that the order of the deduplicated contacts is stable
283    fn test_same_values() {
284        let mut input_1: HashMap<i32, String> = HashMap::new();
285        input_1.insert(1, String::from("A"));
286        input_1.insert(2, String::from("A"));
287        input_1.insert(3, String::from("A"));
288        input_1.insert(4, String::from("B"));
289        input_1.insert(5, String::from("B"));
290        input_1.insert(6, String::from("C"));
291
292        let mut input_2: HashMap<i32, String> = HashMap::new();
293        input_2.insert(1, String::from("A"));
294        input_2.insert(2, String::from("A"));
295        input_2.insert(3, String::from("A"));
296        input_2.insert(4, String::from("B"));
297        input_2.insert(5, String::from("B"));
298        input_2.insert(6, String::from("C"));
299
300        let mut input_3: HashMap<i32, String> = HashMap::new();
301        input_3.insert(1, String::from("A"));
302        input_3.insert(2, String::from("A"));
303        input_3.insert(3, String::from("A"));
304        input_3.insert(4, String::from("B"));
305        input_3.insert(5, String::from("B"));
306        input_3.insert(6, String::from("C"));
307
308        let mut output_1 = Handle::dedupe(&input_1)
309            .into_iter()
310            .collect::<Vec<(i32, i32)>>();
311        let mut output_2 = Handle::dedupe(&input_2)
312            .into_iter()
313            .collect::<Vec<(i32, i32)>>();
314        let mut output_3 = Handle::dedupe(&input_3)
315            .into_iter()
316            .collect::<Vec<(i32, i32)>>();
317
318        output_1.sort();
319        output_2.sort();
320        output_3.sort();
321
322        assert_eq!(output_1, output_2);
323        assert_eq!(output_1, output_3);
324        assert_eq!(output_2, output_3);
325    }
326}