Skip to main content

imessage_database/tables/
handle.rs

1/*!
2 Handle table rows and handle deduplication helpers.
3*/
4
5use rusqlite::{CachedStatement, Connection, Result, Row};
6use std::collections::{BTreeSet, HashMap, HashSet};
7
8use crate::{
9    error::table::TableError,
10    tables::{
11        diagnostic::{HandleDiagnostic, column_exists, count_query},
12        table::{Cacheable, HANDLE, ME, Table},
13    },
14};
15
16// MARK: Handle
17/// Row from the `handle` table.
18#[derive(Debug)]
19pub struct Handle {
20    /// Handle row ID.
21    pub rowid: i32,
22    /// Phone number, email address, or service identifier.
23    pub id: String,
24    /// Contact identity used by Messages to group related handles.
25    pub person_centric_id: Option<String>,
26}
27
28// MARK: Table
29impl Table for Handle {
30    fn from_row(row: &Row) -> Result<Handle> {
31        Ok(Handle {
32            rowid: row.get("rowid")?,
33            id: row.get("id")?,
34            person_centric_id: row.get("person_centric_id").unwrap_or(None),
35        })
36    }
37
38    fn get(db: &'_ Connection) -> Result<CachedStatement<'_>, TableError> {
39        Ok(db.prepare_cached(&format!("SELECT * from {HANDLE}"))?)
40    }
41}
42
43// MARK: Cache
44impl Cacheable for Handle {
45    type K = i32;
46    type V = String;
47    /// Cache handle display strings by row ID.
48    ///
49    /// Handles that share `person_centric_id` map to the same combined string.
50    ///
51    /// # Example:
52    ///
53    /// ```no_run
54    /// use imessage_database::util::dirs::default_db_path;
55    /// use imessage_database::tables::table::{Cacheable, get_connection};
56    /// use imessage_database::tables::handle::Handle;
57    ///
58    /// let db_path = default_db_path();
59    /// let conn = get_connection(&db_path).unwrap();
60    /// let chatrooms = Handle::cache(&conn);
61    /// ```
62    fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
63        let mut map = HashMap::new();
64        // Handle ID 0 is self in group chats.
65        map.insert(0, ME.to_string());
66
67        // Create query
68        let mut statement = Handle::get(db)?;
69
70        // Iterate over the handles and update the map
71        for handle in Handle::rows(&mut statement, [])? {
72            let contact = handle?;
73            map.insert(contact.rowid, contact.id);
74        }
75
76        // Condense contacts that share person_centric_id so their IDs map to the same strings
77        let dupe_contacts = Handle::get_person_id_map(db)?;
78        for contact in dupe_contacts {
79            let (id, new) = contact;
80            map.insert(id, new);
81        }
82
83        Ok(map)
84    }
85}
86
87// MARK: Dedupe
88impl Handle {
89    /// Assign stable deduplicated IDs to handle display strings.
90    ///
91    /// Returns a map from real handle row ID to a sequential participant ID.
92    ///
93    /// Assuming no new handles have been written to the database, deduplicated data is deterministic across runs.
94    ///
95    /// # Example:
96    ///
97    /// ```no_run
98    /// use imessage_database::util::dirs::default_db_path;
99    /// use imessage_database::tables::table::{Cacheable, get_connection};
100    /// use imessage_database::tables::handle::Handle;
101    ///
102    /// let db_path = default_db_path();
103    /// let conn = get_connection(&db_path).unwrap();
104    /// let handles = Handle::cache(&conn).unwrap();
105    /// let deduped_handles = Handle::dedupe(&handles);
106    /// ```
107    pub fn dedupe(duplicated_data: &HashMap<i32, String>) -> HashMap<i32, i32> {
108        let mut deduplicated_participants: HashMap<i32, i32> = HashMap::new();
109        let mut participant_to_unique_participant_id: HashMap<String, i32> = HashMap::new();
110
111        // Build cache of each unique set of participants to a new identifier:
112        let mut unique_participant_identifier = 0;
113
114        // Deterministic iteration keeps deduplicated IDs stable across runs.
115        let mut sorted_dupes: Vec<(&i32, &String)> = duplicated_data.iter().collect();
116        sorted_dupes.sort_by_key(|(a, _)| *a);
117
118        for (participant_id, participant) in sorted_dupes {
119            if let Some(id) = participant_to_unique_participant_id.get(participant) {
120                deduplicated_participants.insert(*participant_id, *id);
121            } else {
122                participant_to_unique_participant_id
123                    .insert(participant.to_owned(), unique_participant_identifier);
124                deduplicated_participants.insert(*participant_id, unique_participant_identifier);
125                unique_participant_identifier += 1;
126            }
127        }
128        deduplicated_participants
129    }
130}
131
132// MARK: Diagnostic
133impl Handle {
134    /// Compute diagnostic data for the `handle` table.
135    ///
136    /// Counts the number of handles that are duplicated. The `person_centric_id`
137    /// is used to map handles that represent the same contact across ids (numbers,
138    /// emails, etc) and across services (iMessage, Jabber, iChat, SMS, etc).
139    ///
140    /// In some databases, `person_centric_id` may not be available.
141    ///
142    /// # Example:
143    ///
144    /// ```no_run
145    /// use imessage_database::util::dirs::default_db_path;
146    /// use imessage_database::tables::table::get_connection;
147    /// use imessage_database::tables::handle::Handle;
148    ///
149    /// let db_path = default_db_path();
150    /// let conn = get_connection(&db_path).unwrap();
151    /// Handle::run_diagnostic(&conn);
152    /// ```
153    pub fn run_diagnostic(db: &Connection) -> Result<HandleDiagnostic, TableError> {
154        let query = concat!(
155            "SELECT COUNT(DISTINCT person_centric_id) ",
156            "FROM handle ",
157            "WHERE person_centric_id NOT NULL"
158        );
159
160        let handles_with_multiple_ids = if column_exists(db, HANDLE, "person_centric_id")? {
161            Some(count_query(db, query)?)
162        } else {
163            None
164        };
165
166        // Cache all handles
167        let all_handles = Self::cache(db)?;
168
169        // Deduplicate handles
170        let unique_handles = Self::dedupe(&all_handles);
171
172        // Calculate total duplicated handles
173        let total_duplicated =
174            all_handles.len() - HashSet::<&i32>::from_iter(unique_handles.values()).len();
175
176        Ok(HandleDiagnostic {
177            total_handles: all_handles.len(),
178            handles_with_multiple_ids,
179            total_duplicated,
180        })
181    }
182}
183
184// MARK: Impl
185impl Handle {
186    /// Map handles sharing `person_centric_id` to a combined display string.
187    fn get_person_id_map(db: &Connection) -> Result<HashMap<i32, String>, TableError> {
188        let mut person_to_id: HashMap<String, BTreeSet<String>> = HashMap::new();
189        let mut row_to_id: HashMap<i32, String> = HashMap::new();
190        let mut row_data: Vec<(String, i32, String)> = vec![];
191
192        // Build query
193        let query = concat!(
194            "SELECT DISTINCT A.person_centric_id, A.rowid, A.id ",
195            "FROM handle A ",
196            "INNER JOIN handle B ON B.id = A.id ",
197            "WHERE A.person_centric_id NOT NULL ",
198            "ORDER BY A.person_centric_id",
199        );
200        let statement = db.prepare(query);
201
202        if let Ok(mut statement) = statement {
203            // Cache the results of the query in memory
204            let contacts = statement.query_map([], |row| {
205                let person_centric_id: String = row.get(0)?;
206                let rowid: i32 = row.get(1)?;
207                let id: String = row.get(2)?;
208                Ok((person_centric_id, rowid, id))
209            })?;
210
211            for contact in contacts {
212                row_data.push(contact?);
213            }
214
215            // First pass: group handle strings by `person_centric_id`.
216            for contact in &row_data {
217                let (person_centric_id, _, id) = contact;
218                if let Some(set) = person_to_id.get_mut(person_centric_id) {
219                    set.insert(id.to_owned());
220                } else {
221                    let mut set = BTreeSet::new();
222                    set.insert(id.to_owned());
223                    person_to_id.insert(person_centric_id.to_owned(), set);
224                }
225            }
226
227            // Second pass: map each row ID to its combined display string.
228            for contact in &row_data {
229                let (person_centric_id, rowid, _) = contact;
230                let data_to_insert = match person_to_id.get_mut(person_centric_id) {
231                    Some(person) => person.iter().cloned().collect::<Vec<String>>().join(" "),
232                    None => continue,
233                };
234                row_to_id.insert(rowid.to_owned(), data_to_insert);
235            }
236        }
237
238        Ok(row_to_id)
239    }
240}
241
242// MARK: Tests
243#[cfg(test)]
244mod tests {
245    use crate::tables::handle::Handle;
246    use rusqlite::Connection;
247    use std::collections::{HashMap, HashSet};
248
249    #[test]
250    fn test_can_dedupe() {
251        let mut input: HashMap<i32, String> = HashMap::new();
252        input.insert(1, String::from("A")); // 0
253        input.insert(2, String::from("A")); // 0
254        input.insert(3, String::from("A")); // 0
255        input.insert(4, String::from("B")); // 1
256        input.insert(5, String::from("B")); // 1
257        input.insert(6, String::from("C")); // 2
258
259        let output = Handle::dedupe(&input);
260        let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
261        assert_eq!(expected_deduped_ids.len(), 3);
262    }
263
264    #[test]
265    // Simulate 3 runs of the program and ensure that the order of the deduplicated contacts is stable
266    fn test_same_values() {
267        let mut input_1: HashMap<i32, String> = HashMap::new();
268        input_1.insert(1, String::from("A"));
269        input_1.insert(2, String::from("A"));
270        input_1.insert(3, String::from("A"));
271        input_1.insert(4, String::from("B"));
272        input_1.insert(5, String::from("B"));
273        input_1.insert(6, String::from("C"));
274
275        let mut input_2: HashMap<i32, String> = HashMap::new();
276        input_2.insert(1, String::from("A"));
277        input_2.insert(2, String::from("A"));
278        input_2.insert(3, String::from("A"));
279        input_2.insert(4, String::from("B"));
280        input_2.insert(5, String::from("B"));
281        input_2.insert(6, String::from("C"));
282
283        let mut input_3: HashMap<i32, String> = HashMap::new();
284        input_3.insert(1, String::from("A"));
285        input_3.insert(2, String::from("A"));
286        input_3.insert(3, String::from("A"));
287        input_3.insert(4, String::from("B"));
288        input_3.insert(5, String::from("B"));
289        input_3.insert(6, String::from("C"));
290
291        let mut output_1 = Handle::dedupe(&input_1)
292            .into_iter()
293            .collect::<Vec<(i32, i32)>>();
294        let mut output_2 = Handle::dedupe(&input_2)
295            .into_iter()
296            .collect::<Vec<(i32, i32)>>();
297        let mut output_3 = Handle::dedupe(&input_3)
298            .into_iter()
299            .collect::<Vec<(i32, i32)>>();
300
301        output_1.sort_unstable();
302        output_2.sort_unstable();
303        output_3.sort_unstable();
304
305        assert_eq!(output_1, output_2);
306        assert_eq!(output_1, output_3);
307        assert_eq!(output_2, output_3);
308    }
309
310    #[test]
311    fn diagnostic_omits_person_centric_count_when_column_is_missing() {
312        let db = Connection::open_in_memory().unwrap();
313        db.execute(
314            "CREATE TABLE handle (ROWID INTEGER PRIMARY KEY, id TEXT NOT NULL)",
315            [],
316        )
317        .unwrap();
318        db.execute(
319            "INSERT INTO handle (ROWID, id) VALUES (1, 'first'), (2, 'second')",
320            [],
321        )
322        .unwrap();
323
324        let diagnostic = Handle::run_diagnostic(&db).unwrap();
325
326        assert_eq!(diagnostic.total_handles, 3);
327        assert_eq!(diagnostic.handles_with_multiple_ids, None);
328    }
329
330    #[test]
331    fn diagnostic_counts_person_centric_ids_when_column_exists() {
332        let db = Connection::open_in_memory().unwrap();
333        db.execute(
334            "CREATE TABLE handle (
335                ROWID INTEGER PRIMARY KEY,
336                id TEXT NOT NULL,
337                person_centric_id TEXT
338            )",
339            [],
340        )
341        .unwrap();
342        db.execute(
343            "INSERT INTO handle (ROWID, id, person_centric_id)
344             VALUES (1, 'first', 'person-1'),
345                    (2, 'second', 'person-1'),
346                    (3, 'third', 'person-2')",
347            [],
348        )
349        .unwrap();
350
351        let diagnostic = Handle::run_diagnostic(&db).unwrap();
352
353        assert_eq!(diagnostic.handles_with_multiple_ids, Some(2));
354    }
355}