1use std::collections::{BTreeSet, HashMap, HashSet};
6
7use crate::{
8 error::table::TableError,
9 tables::table::{CHAT_HANDLE_JOIN, CHAT_MESSAGE_JOIN, Cacheable, Diagnostic, Table},
10 util::output::{done_processing, processing},
11};
12use rusqlite::{CachedStatement, Connection, Error, Result, Row};
13
14pub struct ChatToHandle {
17 chat_id: i32,
18 handle_id: i32,
19}
20
21impl Table for ChatToHandle {
22 fn from_row(row: &Row) -> Result<ChatToHandle> {
23 Ok(ChatToHandle {
24 chat_id: row.get("chat_id")?,
25 handle_id: row.get("handle_id")?,
26 })
27 }
28
29 fn get(db: &'_ Connection) -> Result<CachedStatement<'_>, TableError> {
30 Ok(db.prepare_cached(&format!("SELECT * FROM {CHAT_HANDLE_JOIN}"))?)
31 }
32
33 fn extract(chat_to_handle: Result<Result<Self, Error>, Error>) -> Result<Self, TableError> {
34 match chat_to_handle {
35 Ok(Ok(chat_to_handle)) => Ok(chat_to_handle),
36 Err(why) | Ok(Err(why)) => Err(TableError::QueryError(why)),
37 }
38 }
39}
40
41impl Cacheable for ChatToHandle {
43 type K = i32;
44 type V = BTreeSet<i32>;
45 fn cache(db: &Connection) -> Result<HashMap<Self::K, Self::V>, TableError> {
59 let mut cache: HashMap<i32, BTreeSet<i32>> = HashMap::new();
60
61 let mut rows = ChatToHandle::get(db)?;
62 let mappings = rows.query_map([], |row| Ok(ChatToHandle::from_row(row)))?;
63
64 for mapping in mappings {
65 let joiner = ChatToHandle::extract(mapping)?;
66 if let Some(handles) = cache.get_mut(&joiner.chat_id) {
67 handles.insert(joiner.handle_id);
68 } else {
69 let mut data_to_cache = BTreeSet::new();
70 data_to_cache.insert(joiner.handle_id);
71 cache.insert(joiner.chat_id, data_to_cache);
72 }
73 }
74
75 Ok(cache)
76 }
77}
78
79impl Diagnostic for ChatToHandle {
81 fn run_diagnostic(db: &Connection) -> Result<(), TableError> {
98 processing();
99
100 let mut statement_message_chats =
102 db.prepare(&format!("SELECT DISTINCT chat_id from {CHAT_MESSAGE_JOIN}"))?;
103 let statement_message_chat_rows =
104 statement_message_chats.query_map([], |row: &Row| -> Result<i32> { row.get(0) })?;
105 let mut unique_chats_from_messages: HashSet<i32> = HashSet::new();
106 statement_message_chat_rows.into_iter().for_each(|row| {
107 if let Ok(row) = row {
108 unique_chats_from_messages.insert(row);
109 }
110 });
111
112 let mut statement_handle_chats =
114 db.prepare(&format!("SELECT DISTINCT chat_id from {CHAT_HANDLE_JOIN}"))?;
115 let statement_handle_chat_rows =
116 statement_handle_chats.query_map([], |row: &Row| -> Result<i32> { row.get(0) })?;
117 let mut unique_chats_from_handles: HashSet<i32> = HashSet::new();
118 statement_handle_chat_rows.into_iter().for_each(|row| {
119 if let Ok(row) = row {
120 unique_chats_from_handles.insert(row);
121 }
122 });
123
124 let all_chats = Self::cache(db)?;
126
127 let chatroom_participants = ChatToHandle::cache(db)?;
129 let chat_handle_lookup = ChatToHandle::get_chat_lookup_map(db)?;
130
131 let real_chatrooms = ChatToHandle::dedupe(&chatroom_participants, &chat_handle_lookup)?;
133
134 let total_dupes =
136 all_chats.len() - HashSet::<&i32>::from_iter(real_chatrooms.values()).len();
137
138 done_processing();
139
140 let chats_with_no_handles = unique_chats_from_messages
142 .difference(&unique_chats_from_handles)
143 .count();
144
145 println!("Thread diagnostic data:");
146 println!(" Total chats: {}", all_chats.len());
147
148 if total_dupes > 0 {
149 println!(" Total duplicated chats: {total_dupes}");
150 }
151
152 if chats_with_no_handles > 0 {
153 println!(" Chats with no handles: {chats_with_no_handles:?}");
154 }
155 Ok(())
156 }
157}
158
159impl ChatToHandle {
160 pub fn get_chat_lookup_map(conn: &Connection) -> Result<HashMap<i32, i32>, TableError> {
177 let mut stmt = conn.prepare(
179 "
180WITH RECURSIVE
181 adj AS (
182 SELECT DISTINCT a.chat AS u, b.chat AS v
183 FROM chat_lookup a
184 JOIN chat_lookup b
185 ON a.identifier = b.identifier
186 ),
187 reach(root, chat) AS (
188 SELECT u AS root, v AS chat FROM adj
189 UNION
190 SELECT r.root, a.v
191 FROM reach r
192 JOIN adj a ON a.u = r.chat
193 ),
194 canon AS (
195 SELECT chat, MAX(root) AS canonical_chat
196 FROM reach
197 GROUP BY chat
198 )
199SELECT chat, canonical_chat
200FROM canon
201ORDER BY chat;
202 ",
203 );
204 let mut chat_lookup_map: HashMap<i32, i32> = HashMap::new();
205
206 if let Ok(statement) = stmt.as_mut() {
207 let chat_lookup_rows = statement.query_map([], |row| {
209 let chat: i32 = row.get(0)?;
210 let canonical: i32 = row.get(1)?;
211 Ok((chat, canonical))
212 });
213
214 if let Ok(chat_lookup_rows) = chat_lookup_rows {
216 for row in chat_lookup_rows {
217 let (chat_id, canonical_chat) = row?;
218 chat_lookup_map.insert(chat_id, canonical_chat);
219 }
220 }
221 }
222 Ok(chat_lookup_map)
223 }
224
225 pub fn dedupe(
247 duplicated_data: &HashMap<i32, BTreeSet<i32>>,
248 chat_lookup_map: &HashMap<i32, i32>,
249 ) -> Result<HashMap<i32, i32>, TableError> {
250 let mut deduplicated_chats: HashMap<i32, i32> = HashMap::new();
251 let mut participants_to_unique_chat_id: HashMap<BTreeSet<i32>, i32> = HashMap::new();
252
253 let mut unique_chat_identifier = 0;
255
256 let mut sorted_dupes: Vec<(&i32, &BTreeSet<i32>)> = duplicated_data.iter().collect();
258 sorted_dupes.sort_by(|(a, _), (b, _)| a.cmp(b));
259
260 for (chat_id, participants) in sorted_dupes {
262 if let Some(id) = participants_to_unique_chat_id.get(participants) {
264 deduplicated_chats.insert(chat_id.to_owned(), id.to_owned());
265 } else {
266 let mapped_id = if let Some(canonical_chat) = chat_lookup_map.get(chat_id) {
268 canonical_chat
269 } else {
270 chat_id
271 };
272
273 if let Some(id) = deduplicated_chats.get(mapped_id) {
275 deduplicated_chats.insert(*chat_id, id.to_owned());
277 } else {
278 participants_to_unique_chat_id
280 .insert(participants.to_owned(), unique_chat_identifier);
281
282 deduplicated_chats.insert(chat_id.to_owned(), unique_chat_identifier);
284 unique_chat_identifier += 1;
285 }
286 }
287 }
288 Ok(deduplicated_chats)
289 }
290}
291
292#[cfg(test)]
294mod tests {
295 use crate::tables::chat_handle::ChatToHandle;
296 use std::collections::{BTreeSet, HashMap, HashSet};
297
298 #[test]
299 fn can_dedupe() {
300 let mut input: HashMap<i32, BTreeSet<i32>> = HashMap::new();
301 input.insert(1, BTreeSet::from([1])); input.insert(2, BTreeSet::from([1])); input.insert(3, BTreeSet::from([1])); input.insert(4, BTreeSet::from([2])); input.insert(5, BTreeSet::from([2])); input.insert(6, BTreeSet::from([3])); let output = ChatToHandle::dedupe(&input, &HashMap::new());
309 let expected_deduped_ids: HashSet<i32> = output.unwrap().values().copied().collect();
310 assert_eq!(expected_deduped_ids.len(), 3);
311 }
312
313 #[test]
314 fn can_dedupe_multi() {
315 let mut input: HashMap<i32, BTreeSet<i32>> = HashMap::new();
316 input.insert(1, BTreeSet::from([1, 2])); input.insert(2, BTreeSet::from([1])); input.insert(3, BTreeSet::from([1])); input.insert(4, BTreeSet::from([2, 1])); input.insert(5, BTreeSet::from([2, 3])); input.insert(6, BTreeSet::from([3])); let output = ChatToHandle::dedupe(&input, &HashMap::new());
324 let expected_deduped_ids: HashSet<i32> = output.unwrap().values().copied().collect();
325 assert_eq!(expected_deduped_ids.len(), 4);
326 }
327
328 #[test]
329 fn test_same_values() {
331 let mut input_1: HashMap<i32, BTreeSet<i32>> = HashMap::new();
332 input_1.insert(1, BTreeSet::from([1]));
333 input_1.insert(2, BTreeSet::from([1]));
334 input_1.insert(3, BTreeSet::from([1]));
335 input_1.insert(4, BTreeSet::from([2]));
336 input_1.insert(5, BTreeSet::from([2]));
337 input_1.insert(6, BTreeSet::from([3]));
338
339 let mut input_2: HashMap<i32, BTreeSet<i32>> = HashMap::new();
340 input_2.insert(1, BTreeSet::from([1]));
341 input_2.insert(2, BTreeSet::from([1]));
342 input_2.insert(3, BTreeSet::from([1]));
343 input_2.insert(4, BTreeSet::from([2]));
344 input_2.insert(5, BTreeSet::from([2]));
345 input_2.insert(6, BTreeSet::from([3]));
346
347 let mut input_3: HashMap<i32, BTreeSet<i32>> = HashMap::new();
348 input_3.insert(1, BTreeSet::from([1]));
349 input_3.insert(2, BTreeSet::from([1]));
350 input_3.insert(3, BTreeSet::from([1]));
351 input_3.insert(4, BTreeSet::from([2]));
352 input_3.insert(5, BTreeSet::from([2]));
353 input_3.insert(6, BTreeSet::from([3]));
354
355 let mut output_1 = ChatToHandle::dedupe(&input_1, &HashMap::new())
356 .unwrap()
357 .into_iter()
358 .collect::<Vec<(i32, i32)>>();
359 let mut output_2 = ChatToHandle::dedupe(&input_2, &HashMap::new())
360 .unwrap()
361 .into_iter()
362 .collect::<Vec<(i32, i32)>>();
363 let mut output_3 = ChatToHandle::dedupe(&input_3, &HashMap::new())
364 .unwrap()
365 .into_iter()
366 .collect::<Vec<(i32, i32)>>();
367
368 output_1.sort_unstable();
369 output_2.sort_unstable();
370 output_3.sort_unstable();
371
372 assert_eq!(output_1, output_2);
373 assert_eq!(output_1, output_3);
374 assert_eq!(output_2, output_3);
375 }
376
377 #[test]
378 fn can_dedupe_with_chat_lookup_map() {
379 let mut input: HashMap<i32, BTreeSet<i32>> = HashMap::new();
380 input.insert(0, BTreeSet::from([1])); input.insert(1, BTreeSet::from([1])); input.insert(2, BTreeSet::from([3])); input.insert(4, BTreeSet::from([2])); input.insert(5, BTreeSet::from([1])); let mut chat_lookup_map: HashMap<i32, i32> = HashMap::new();
387 chat_lookup_map.insert(2, 5);
388 chat_lookup_map.insert(4, 0);
389
390 let output = ChatToHandle::dedupe(&input, &chat_lookup_map).unwrap();
391
392 assert_eq!(output.get(&0), output.get(&1));
394 assert_eq!(output.get(&0), output.get(&4));
395 assert_ne!(output.get(&2), output.get(&1));
397 }
398
399 #[test]
400 fn can_dedupe_with_lookup_map_overriding_participants() {
401 let mut input: HashMap<i32, BTreeSet<i32>> = HashMap::new();
402 input.insert(0, BTreeSet::from([1, 2])); input.insert(1, BTreeSet::from([1, 2])); input.insert(2, BTreeSet::from([3, 4])); input.insert(3, BTreeSet::from([1, 2])); let mut chat_lookup_map: HashMap<i32, i32> = HashMap::new();
408 chat_lookup_map.insert(1, 0);
409 chat_lookup_map.insert(2, 0);
410
411 let output = ChatToHandle::dedupe(&input, &chat_lookup_map).unwrap();
412
413 assert_eq!(output.get(&0), output.get(&1));
415 assert_eq!(output.get(&0), output.get(&2));
416 assert_eq!(output.get(&3), output.get(&0));
418 }
419
420 #[test]
421 fn can_dedupe_mixed_lookup_and_participants() {
422 let mut input: HashMap<i32, BTreeSet<i32>> = HashMap::new();
423 input.insert(0, BTreeSet::from([1])); input.insert(1, BTreeSet::from([1])); input.insert(2, BTreeSet::from([3])); input.insert(3, BTreeSet::from([2])); input.insert(4, BTreeSet::from([3])); let mut chat_lookup_map: HashMap<i32, i32> = HashMap::new();
430 chat_lookup_map.insert(1, 0);
431 chat_lookup_map.insert(3, 0);
432
433 let output = ChatToHandle::dedupe(&input, &chat_lookup_map).unwrap();
434
435 assert_eq!(output.get(&0), output.get(&1));
437 assert_eq!(output.get(&0), output.get(&3));
438 assert_ne!(output.get(&2), output.get(&1));
440 assert_eq!(output.get(&4), output.get(&2));
442 assert_ne!(output.get(&3), output.get(&4));
444 }
445}