imessage_database/tables/
chat_handle.rs

1/*!
2 This module represents the chat to handle join table.
3*/
4
5use std::collections::{BTreeSet, HashMap, HashSet};
6
7use crate::{
8    error::table::TableError,
9    tables::table::{
10        Cacheable, Deduplicate, Diagnostic, Table, CHAT_HANDLE_JOIN, CHAT_MESSAGE_JOIN,
11    },
12    util::output::{done_processing, processing},
13};
14use rusqlite::{Connection, Error, Result, Row, Statement};
15
16/// Represents a single row in the `chat_handle_join` table.
17pub struct ChatToHandle {
18    chat_id: i32,
19    handle_id: i32,
20}
21
22impl Table for ChatToHandle {
23    fn from_row(row: &Row) -> Result<ChatToHandle> {
24        Ok(ChatToHandle {
25            chat_id: row.get("chat_id")?,
26            handle_id: row.get("handle_id")?,
27        })
28    }
29
30    fn get(db: &Connection) -> Result<Statement, TableError> {
31        db.prepare(&format!("SELECT * FROM {CHAT_HANDLE_JOIN}"))
32            .map_err(TableError::ChatToHandle)
33    }
34
35    fn extract(chat_to_handle: Result<Result<Self, Error>, Error>) -> Result<Self, TableError> {
36        match chat_to_handle {
37            Ok(Ok(chat_to_handle)) => Ok(chat_to_handle),
38            Err(why) | Ok(Err(why)) => Err(TableError::ChatToHandle(why)),
39        }
40    }
41}
42
43impl Cacheable for ChatToHandle {
44    type K = i32;
45    type V = BTreeSet<i32>;
46    /// Generate a hashmap containing each chatroom's ID pointing to a `HashSet` of participant handle IDs
47    ///
48    /// # Example:
49    ///
50    /// ```
51    /// use imessage_database::util::dirs::default_db_path;
52    /// use imessage_database::tables::table::{Cacheable, get_connection};
53    /// use imessage_database::tables::chat_handle::ChatToHandle;
54    ///
55    /// let db_path = default_db_path();
56    /// let conn = get_connection(&db_path).unwrap();
57    /// let chatrooms = ChatToHandle::cache(&conn);
58    /// ```
59    fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
60        let mut cache: HashMap<i32, BTreeSet<i32>> = HashMap::new();
61
62        let mut rows = ChatToHandle::get(db)?;
63        let mappings = rows
64            .query_map([], |row| Ok(ChatToHandle::from_row(row)))
65            .map_err(TableError::ChatToHandle)?;
66
67        for mapping in mappings {
68            let joiner = ChatToHandle::extract(mapping)?;
69            if let Some(handles) = cache.get_mut(&joiner.chat_id) {
70                handles.insert(joiner.handle_id);
71            } else {
72                let mut data_to_cache = BTreeSet::new();
73                data_to_cache.insert(joiner.handle_id);
74                cache.insert(joiner.chat_id, data_to_cache);
75            }
76        }
77
78        Ok(cache)
79    }
80}
81
82impl Deduplicate for ChatToHandle {
83    type T = BTreeSet<i32>;
84
85    /// Given the initial set of duplicated chats, deduplicate them based on the participants
86    ///
87    /// This returns a new hashmap that maps the real chat ID to a new deduplicated unique chat ID
88    /// that represents a single chat for all of the same participants, even if they have multiple handles.
89    ///
90    /// Assuming no new chat-handle relationships have been written to the database, deduplicated data is deterministic across runs.
91    /// 
92    /// # Example:
93    ///
94    /// ```
95    /// use imessage_database::util::dirs::default_db_path;
96    /// use imessage_database::tables::table::{Cacheable, Deduplicate, get_connection};
97    /// use imessage_database::tables::chat_handle::ChatToHandle;
98    ///
99    /// let db_path = default_db_path();
100    /// let conn = get_connection(&db_path).unwrap();
101    /// let chatrooms = ChatToHandle::cache(&conn).unwrap();
102    /// let deduped_chatrooms = ChatToHandle::dedupe(&chatrooms);
103    /// ```
104    fn dedupe(duplicated_data: &HashMap<i32, Self::T>) -> HashMap<i32, i32> {
105        let mut deduplicated_chats: HashMap<i32, i32> = HashMap::new();
106        let mut participants_to_unique_chat_id: HashMap<Self::T, i32> = HashMap::new();
107
108        // Build cache of each unique set of participants to a new identifier
109        let mut unique_chat_identifier = 0;
110
111        // Iterate over the values in a deterministic order
112        let mut sorted_dupes: Vec<(&i32, &Self::T)> = duplicated_data.iter().collect();
113        sorted_dupes.sort_by(|(a, _), (b, _)| a.cmp(b));
114
115        for (chat_id, participants) in sorted_dupes {
116            if let Some(id) = participants_to_unique_chat_id.get(participants) {
117                deduplicated_chats.insert(chat_id.to_owned(), id.to_owned());
118            } else {
119                participants_to_unique_chat_id
120                    .insert(participants.to_owned(), unique_chat_identifier);
121                deduplicated_chats.insert(chat_id.to_owned(), unique_chat_identifier);
122                unique_chat_identifier += 1;
123            }
124        }
125        deduplicated_chats
126    }
127}
128
129impl Diagnostic for ChatToHandle {
130    /// Emit diagnostic data for the Chat to Handle join table
131    ///
132    /// Get the number of chats referenced in the messages table
133    /// that do not exist in this join table:
134    /// # Example:
135    ///
136    /// ```
137    /// use imessage_database::util::dirs::default_db_path;
138    /// use imessage_database::tables::table::{Diagnostic, get_connection};
139    /// use imessage_database::tables::chat_handle::ChatToHandle;
140    ///
141    /// let db_path = default_db_path();
142    /// let conn = get_connection(&db_path).unwrap();
143    /// ChatToHandle::run_diagnostic(&conn);
144    /// ```
145    fn run_diagnostic(db: &Connection) -> Result<(), TableError> {
146        processing();
147
148        // Get the Chat IDs that are associated with messages
149        let mut statement_message_chats = db
150            .prepare(&format!("SELECT DISTINCT chat_id from {CHAT_MESSAGE_JOIN}"))
151            .map_err(TableError::ChatToHandle)?;
152        let statement_message_chat_rows = statement_message_chats
153            .query_map([], |row: &Row| -> Result<i32> { row.get(0) })
154            .map_err(TableError::ChatToHandle)?;
155        let mut unique_chats_from_messages: HashSet<i32> = HashSet::new();
156        statement_message_chat_rows.into_iter().for_each(|row| {
157            if let Ok(row) = row {
158                unique_chats_from_messages.insert(row);
159            }
160        });
161
162        // Get the Chat IDs that are associated with handles
163        let mut statement_handle_chats = db
164            .prepare(&format!("SELECT DISTINCT chat_id from {CHAT_HANDLE_JOIN}"))
165            .map_err(TableError::ChatToHandle)?;
166        let statement_handle_chat_rows = statement_handle_chats
167            .query_map([], |row: &Row| -> Result<i32> { row.get(0) })
168            .map_err(TableError::ChatToHandle)?;
169        let mut unique_chats_from_handles: HashSet<i32> = HashSet::new();
170        statement_handle_chat_rows.into_iter().for_each(|row| {
171            if let Ok(row) = row {
172                unique_chats_from_handles.insert(row);
173            }
174        });
175
176        done_processing();
177
178        // Find the set difference and emit
179        let chats_with_no_handles = unique_chats_from_messages
180            .difference(&unique_chats_from_handles)
181            .count();
182        if chats_with_no_handles > 0 {
183            println!("Thread diagnostic data:");
184            println!("    Chats with no handles: {chats_with_no_handles:?}");
185        }
186
187        Ok(())
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    use crate::tables::{chat_handle::ChatToHandle, table::Deduplicate};
194    use std::collections::{BTreeSet, HashMap, HashSet};
195
196    #[test]
197    fn can_dedupe() {
198        let mut input: HashMap<i32, BTreeSet<i32>> = HashMap::new();
199        input.insert(1, BTreeSet::from([1])); // 0
200        input.insert(2, BTreeSet::from([1])); // 0
201        input.insert(3, BTreeSet::from([1])); // 0
202        input.insert(4, BTreeSet::from([2])); // 1
203        input.insert(5, BTreeSet::from([2])); // 1
204        input.insert(6, BTreeSet::from([3])); // 2
205
206        let output = ChatToHandle::dedupe(&input);
207        let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
208        assert_eq!(expected_deduped_ids.len(), 3);
209    }
210
211    #[test]
212    fn can_dedupe_multi() {
213        let mut input: HashMap<i32, BTreeSet<i32>> = HashMap::new();
214        input.insert(1, BTreeSet::from([1, 2])); // 0
215        input.insert(2, BTreeSet::from([1])); // 1
216        input.insert(3, BTreeSet::from([1])); // 1
217        input.insert(4, BTreeSet::from([2, 1])); // 0
218        input.insert(5, BTreeSet::from([2, 3])); // 2
219        input.insert(6, BTreeSet::from([3])); // 3
220
221        let output = ChatToHandle::dedupe(&input);
222        let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
223        assert_eq!(expected_deduped_ids.len(), 4);
224    }
225
226    #[test]
227    // Simulate 3 runs of the program and ensure that the order of the deduplicated contacts is stable
228    fn test_same_values() {
229        let mut input_1: HashMap<i32, BTreeSet<i32>> = HashMap::new();
230        input_1.insert(1, BTreeSet::from([1]));
231        input_1.insert(2, BTreeSet::from([1]));
232        input_1.insert(3, BTreeSet::from([1]));
233        input_1.insert(4, BTreeSet::from([2]));
234        input_1.insert(5, BTreeSet::from([2]));
235        input_1.insert(6, BTreeSet::from([3]));
236
237        let mut input_2: HashMap<i32, BTreeSet<i32>> = HashMap::new();
238        input_2.insert(1, BTreeSet::from([1]));
239        input_2.insert(2, BTreeSet::from([1]));
240        input_2.insert(3, BTreeSet::from([1]));
241        input_2.insert(4, BTreeSet::from([2]));
242        input_2.insert(5, BTreeSet::from([2]));
243        input_2.insert(6, BTreeSet::from([3]));
244
245        let mut input_3: HashMap<i32, BTreeSet<i32>> = HashMap::new();
246        input_3.insert(1, BTreeSet::from([1]));
247        input_3.insert(2, BTreeSet::from([1]));
248        input_3.insert(3, BTreeSet::from([1]));
249        input_3.insert(4, BTreeSet::from([2]));
250        input_3.insert(5, BTreeSet::from([2]));
251        input_3.insert(6, BTreeSet::from([3]));
252
253        let mut output_1 = ChatToHandle::dedupe(&input_1)
254            .into_iter()
255            .collect::<Vec<(i32, i32)>>();
256        let mut output_2 = ChatToHandle::dedupe(&input_2)
257            .into_iter()
258            .collect::<Vec<(i32, i32)>>();
259        let mut output_3 = ChatToHandle::dedupe(&input_3)
260            .into_iter()
261            .collect::<Vec<(i32, i32)>>();
262
263        output_1.sort();
264        output_2.sort();
265        output_3.sort();
266
267        assert_eq!(output_1, output_2);
268        assert_eq!(output_1, output_3);
269        assert_eq!(output_2, output_3);
270    }
271}