imessage_database/tables/
chat_handle.rs

1/*!
2 This module represents the chat to handle join table.
3*/
4
5use std::collections::{BTreeSet, HashMap, HashSet};
6
7use crate::{
8    error::table::TableError,
9    tables::table::{
10        CHAT_HANDLE_JOIN, CHAT_MESSAGE_JOIN, Cacheable, Deduplicate, Diagnostic, Table,
11    },
12    util::output::{done_processing, processing},
13};
14use rusqlite::{Connection, Error, Result, Row, Statement};
15
16/// Represents a single row in the `chat_handle_join` table.
17pub struct ChatToHandle {
18    chat_id: i32,
19    handle_id: i32,
20}
21
22impl Table for ChatToHandle {
23    fn from_row(row: &Row) -> Result<ChatToHandle> {
24        Ok(ChatToHandle {
25            chat_id: row.get("chat_id")?,
26            handle_id: row.get("handle_id")?,
27        })
28    }
29
30    fn get(db: &Connection) -> Result<Statement, TableError> {
31        Ok(db.prepare(&format!("SELECT * FROM {CHAT_HANDLE_JOIN}"))?)
32    }
33
34    fn extract(chat_to_handle: Result<Result<Self, Error>, Error>) -> Result<Self, TableError> {
35        match chat_to_handle {
36            Ok(Ok(chat_to_handle)) => Ok(chat_to_handle),
37            Err(why) | Ok(Err(why)) => Err(TableError::QueryError(why)),
38        }
39    }
40}
41
42impl Cacheable for ChatToHandle {
43    type K = i32;
44    type V = BTreeSet<i32>;
45    /// Generate a hashmap containing each chatroom's ID pointing to a `HashSet` of participant handle IDs
46    ///
47    /// # Example:
48    ///
49    /// ```
50    /// use imessage_database::util::dirs::default_db_path;
51    /// use imessage_database::tables::table::{Cacheable, get_connection};
52    /// use imessage_database::tables::chat_handle::ChatToHandle;
53    ///
54    /// let db_path = default_db_path();
55    /// let conn = get_connection(&db_path).unwrap();
56    /// let chatrooms = ChatToHandle::cache(&conn);
57    /// ```
58    fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
59        let mut cache: HashMap<i32, BTreeSet<i32>> = HashMap::new();
60
61        let mut rows = ChatToHandle::get(db)?;
62        let mappings = rows.query_map([], |row| Ok(ChatToHandle::from_row(row)))?;
63
64        for mapping in mappings {
65            let joiner = ChatToHandle::extract(mapping)?;
66            if let Some(handles) = cache.get_mut(&joiner.chat_id) {
67                handles.insert(joiner.handle_id);
68            } else {
69                let mut data_to_cache = BTreeSet::new();
70                data_to_cache.insert(joiner.handle_id);
71                cache.insert(joiner.chat_id, data_to_cache);
72            }
73        }
74
75        Ok(cache)
76    }
77}
78
79impl Deduplicate for ChatToHandle {
80    type T = BTreeSet<i32>;
81
82    /// Given the initial set of duplicated chats, deduplicate them based on the participants
83    ///
84    /// This returns a new hashmap that maps the real chat ID to a new deduplicated unique chat ID
85    /// that represents a single chat for all of the same participants, even if they have multiple handles.
86    ///
87    /// Assuming no new chat-handle relationships have been written to the database, deduplicated data is deterministic across runs.
88    ///
89    /// # Example:
90    ///
91    /// ```
92    /// use imessage_database::util::dirs::default_db_path;
93    /// use imessage_database::tables::table::{Cacheable, Deduplicate, get_connection};
94    /// use imessage_database::tables::chat_handle::ChatToHandle;
95    ///
96    /// let db_path = default_db_path();
97    /// let conn = get_connection(&db_path).unwrap();
98    /// let chatrooms = ChatToHandle::cache(&conn).unwrap();
99    /// let deduped_chatrooms = ChatToHandle::dedupe(&chatrooms);
100    /// ```
101    fn dedupe(duplicated_data: &HashMap<i32, Self::T>) -> HashMap<i32, i32> {
102        let mut deduplicated_chats: HashMap<i32, i32> = HashMap::new();
103        let mut participants_to_unique_chat_id: HashMap<Self::T, i32> = HashMap::new();
104
105        // Build cache of each unique set of participants to a new identifier
106        let mut unique_chat_identifier = 0;
107
108        // Iterate over the values in a deterministic order
109        let mut sorted_dupes: Vec<(&i32, &Self::T)> = duplicated_data.iter().collect();
110        sorted_dupes.sort_by(|(a, _), (b, _)| a.cmp(b));
111
112        for (chat_id, participants) in sorted_dupes {
113            if let Some(id) = participants_to_unique_chat_id.get(participants) {
114                deduplicated_chats.insert(chat_id.to_owned(), id.to_owned());
115            } else {
116                participants_to_unique_chat_id
117                    .insert(participants.to_owned(), unique_chat_identifier);
118                deduplicated_chats.insert(chat_id.to_owned(), unique_chat_identifier);
119                unique_chat_identifier += 1;
120            }
121        }
122        deduplicated_chats
123    }
124}
125
126impl Diagnostic for ChatToHandle {
127    /// Emit diagnostic data for the Chat to Handle join table
128    ///
129    /// Get the number of chats referenced in the messages table
130    /// that do not exist in this join table:
131    /// # Example:
132    ///
133    /// ```
134    /// use imessage_database::util::dirs::default_db_path;
135    /// use imessage_database::tables::table::{Diagnostic, get_connection};
136    /// use imessage_database::tables::chat_handle::ChatToHandle;
137    ///
138    /// let db_path = default_db_path();
139    /// let conn = get_connection(&db_path).unwrap();
140    /// ChatToHandle::run_diagnostic(&conn);
141    /// ```
142    fn run_diagnostic(db: &Connection) -> Result<(), TableError> {
143        processing();
144
145        // Get the Chat IDs that are associated with messages
146        let mut statement_message_chats =
147            db.prepare(&format!("SELECT DISTINCT chat_id from {CHAT_MESSAGE_JOIN}"))?;
148        let statement_message_chat_rows =
149            statement_message_chats.query_map([], |row: &Row| -> Result<i32> { row.get(0) })?;
150        let mut unique_chats_from_messages: HashSet<i32> = HashSet::new();
151        statement_message_chat_rows.into_iter().for_each(|row| {
152            if let Ok(row) = row {
153                unique_chats_from_messages.insert(row);
154            }
155        });
156
157        // Get the Chat IDs that are associated with handles
158        let mut statement_handle_chats =
159            db.prepare(&format!("SELECT DISTINCT chat_id from {CHAT_HANDLE_JOIN}"))?;
160        let statement_handle_chat_rows =
161            statement_handle_chats.query_map([], |row: &Row| -> Result<i32> { row.get(0) })?;
162        let mut unique_chats_from_handles: HashSet<i32> = HashSet::new();
163        statement_handle_chat_rows.into_iter().for_each(|row| {
164            if let Ok(row) = row {
165                unique_chats_from_handles.insert(row);
166            }
167        });
168
169        done_processing();
170
171        // Find the set difference and emit
172        let chats_with_no_handles = unique_chats_from_messages
173            .difference(&unique_chats_from_handles)
174            .count();
175        if chats_with_no_handles > 0 {
176            println!("Thread diagnostic data:");
177            println!("    Chats with no handles: {chats_with_no_handles:?}");
178        }
179
180        Ok(())
181    }
182}
183
184#[cfg(test)]
185mod tests {
186    use crate::tables::{chat_handle::ChatToHandle, table::Deduplicate};
187    use std::collections::{BTreeSet, HashMap, HashSet};
188
189    #[test]
190    fn can_dedupe() {
191        let mut input: HashMap<i32, BTreeSet<i32>> = HashMap::new();
192        input.insert(1, BTreeSet::from([1])); // 0
193        input.insert(2, BTreeSet::from([1])); // 0
194        input.insert(3, BTreeSet::from([1])); // 0
195        input.insert(4, BTreeSet::from([2])); // 1
196        input.insert(5, BTreeSet::from([2])); // 1
197        input.insert(6, BTreeSet::from([3])); // 2
198
199        let output = ChatToHandle::dedupe(&input);
200        let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
201        assert_eq!(expected_deduped_ids.len(), 3);
202    }
203
204    #[test]
205    fn can_dedupe_multi() {
206        let mut input: HashMap<i32, BTreeSet<i32>> = HashMap::new();
207        input.insert(1, BTreeSet::from([1, 2])); // 0
208        input.insert(2, BTreeSet::from([1])); // 1
209        input.insert(3, BTreeSet::from([1])); // 1
210        input.insert(4, BTreeSet::from([2, 1])); // 0
211        input.insert(5, BTreeSet::from([2, 3])); // 2
212        input.insert(6, BTreeSet::from([3])); // 3
213
214        let output = ChatToHandle::dedupe(&input);
215        let expected_deduped_ids: HashSet<i32> = output.values().copied().collect();
216        assert_eq!(expected_deduped_ids.len(), 4);
217    }
218
219    #[test]
220    // Simulate 3 runs of the program and ensure that the order of the deduplicated contacts is stable
221    fn test_same_values() {
222        let mut input_1: HashMap<i32, BTreeSet<i32>> = HashMap::new();
223        input_1.insert(1, BTreeSet::from([1]));
224        input_1.insert(2, BTreeSet::from([1]));
225        input_1.insert(3, BTreeSet::from([1]));
226        input_1.insert(4, BTreeSet::from([2]));
227        input_1.insert(5, BTreeSet::from([2]));
228        input_1.insert(6, BTreeSet::from([3]));
229
230        let mut input_2: HashMap<i32, BTreeSet<i32>> = HashMap::new();
231        input_2.insert(1, BTreeSet::from([1]));
232        input_2.insert(2, BTreeSet::from([1]));
233        input_2.insert(3, BTreeSet::from([1]));
234        input_2.insert(4, BTreeSet::from([2]));
235        input_2.insert(5, BTreeSet::from([2]));
236        input_2.insert(6, BTreeSet::from([3]));
237
238        let mut input_3: HashMap<i32, BTreeSet<i32>> = HashMap::new();
239        input_3.insert(1, BTreeSet::from([1]));
240        input_3.insert(2, BTreeSet::from([1]));
241        input_3.insert(3, BTreeSet::from([1]));
242        input_3.insert(4, BTreeSet::from([2]));
243        input_3.insert(5, BTreeSet::from([2]));
244        input_3.insert(6, BTreeSet::from([3]));
245
246        let mut output_1 = ChatToHandle::dedupe(&input_1)
247            .into_iter()
248            .collect::<Vec<(i32, i32)>>();
249        let mut output_2 = ChatToHandle::dedupe(&input_2)
250            .into_iter()
251            .collect::<Vec<(i32, i32)>>();
252        let mut output_3 = ChatToHandle::dedupe(&input_3)
253            .into_iter()
254            .collect::<Vec<(i32, i32)>>();
255
256        output_1.sort_unstable();
257        output_2.sort_unstable();
258        output_3.sort_unstable();
259
260        assert_eq!(output_1, output_2);
261        assert_eq!(output_1, output_3);
262        assert_eq!(output_2, output_3);
263    }
264}