1use chrono::{DateTime, Utc};
5use std::collections::HashMap;
6
7#[derive(Debug, Clone)]
8pub struct MessageForThreading {
9 pub message_id: String,
10 pub in_reply_to: Option<String>,
11 pub references: Vec<String>,
12 pub date: DateTime<Utc>,
13 pub subject: String,
14}
15
16#[derive(Debug, Clone)]
17struct Container {
18 message: Option<MessageForThreading>,
19 parent: Option<String>,
20 children: Vec<String>,
21}
22
23impl Container {
24 fn empty() -> Self {
25 Self {
26 message: None,
27 parent: None,
28 children: Vec::new(),
29 }
30 }
31}
32
33#[derive(Debug, Clone)]
34pub struct ThreadTree {
35 pub root_message_id: String,
36 pub messages: Vec<String>,
37}
38
39pub fn thread_messages(messages: &[MessageForThreading]) -> Vec<ThreadTree> {
41 if messages.is_empty() {
42 return Vec::new();
43 }
44
45 let message_map: HashMap<&str, &MessageForThreading> = messages
46 .iter()
47 .map(|message| (message.message_id.as_str(), message))
48 .collect();
49 let mut id_table: HashMap<String, Container> = HashMap::new();
50
51 for msg in messages {
53 let container = id_table
54 .entry(msg.message_id.clone())
55 .or_insert_with(Container::empty);
56 container.message = Some(msg.clone());
57
58 let mut prev_id: Option<&str> = None;
60 for ref_id in &msg.references {
61 id_table
62 .entry(ref_id.clone())
63 .or_insert_with(Container::empty);
64 if let Some(parent_id) = prev_id {
65 let ref_container = id_table.get(ref_id).unwrap();
67 if ref_container.parent.is_none()
68 && !would_create_cycle(&id_table, parent_id, ref_id)
69 {
70 let parent_id_owned = parent_id.to_string();
72 let ref_id_owned = ref_id.clone();
73 if let Some(c) = id_table.get_mut(&ref_id_owned) {
74 c.parent = Some(parent_id_owned.clone());
75 }
76 if let Some(p) = id_table.get_mut(&parent_id_owned) {
77 if !p.children.contains(&ref_id_owned) {
78 p.children.push(ref_id_owned);
79 }
80 }
81 }
82 }
83 prev_id = Some(ref_id);
84 }
85
86 let parent = msg.in_reply_to.as_deref().or(prev_id);
88 if let Some(parent_id) = parent {
89 if parent_id != msg.message_id
90 && !would_create_cycle(&id_table, parent_id, &msg.message_id)
91 {
92 let parent_id_owned = parent_id.to_string();
93 let msg_id = msg.message_id.clone();
94 if let Some(c) = id_table.get_mut(&msg_id) {
95 c.parent = Some(parent_id_owned.clone());
96 }
97 if let Some(p) = id_table.get_mut(&parent_id_owned) {
98 if !p.children.contains(&msg_id) {
99 p.children.push(msg_id);
100 }
101 }
102 }
103 }
104 }
105
106 let mut roots: Vec<String> = Vec::new();
111 for (id, container) in &id_table {
112 if container.message.is_none() {
113 continue;
114 }
115 match &container.parent {
116 None => roots.push(id.clone()),
117 Some(parent_id) => {
118 if let Some(parent) = id_table.get(parent_id) {
120 if parent.message.is_none() && parent.parent.is_none() {
121 roots.push(id.clone());
122 }
123 }
124 }
125 }
126 }
127
128 roots.sort_by(|a, b| {
130 let date_a = id_table
131 .get(a)
132 .and_then(|c| c.message.as_ref())
133 .map(|m| m.date)
134 .unwrap_or_default();
135 let date_b = id_table
136 .get(b)
137 .and_then(|c| c.message.as_ref())
138 .map(|m| m.date)
139 .unwrap_or_default();
140 date_a.cmp(&date_b)
141 });
142
143 let mut threads = Vec::new();
145 for root_id in &roots {
146 let mut thread_messages = Vec::new();
147 collect_thread_messages(&id_table, root_id, &mut thread_messages);
148 if !thread_messages.is_empty() {
149 threads.push(ThreadTree {
150 root_message_id: root_id.clone(),
151 messages: thread_messages,
152 });
153 }
154 }
155
156 merge_subject_fallback_threads(threads, &message_map)
157}
158
159fn collect_thread_messages(table: &HashMap<String, Container>, id: &str, out: &mut Vec<String>) {
160 if let Some(container) = table.get(id) {
161 if container.message.is_some() {
162 out.push(id.to_string());
163 }
164 for child_id in &container.children {
165 collect_thread_messages(table, child_id, out);
166 }
167 }
168}
169
170fn would_create_cycle(table: &HashMap<String, Container>, parent_id: &str, child_id: &str) -> bool {
171 let mut current = Some(parent_id);
173 while let Some(id) = current {
174 if id == child_id {
175 return true;
176 }
177 current = table.get(id).and_then(|c| c.parent.as_deref());
178 }
179 false
180}
181
182fn merge_subject_fallback_threads(
183 threads: Vec<ThreadTree>,
184 message_map: &HashMap<&str, &MessageForThreading>,
185) -> Vec<ThreadTree> {
186 let mut merged: Vec<ThreadTree> = Vec::new();
187 let mut subject_roots: HashMap<String, usize> = HashMap::new();
188
189 for thread in threads {
190 let Some(first_id) = thread.messages.first() else {
191 continue;
192 };
193 let key = message_map
194 .get(first_id.as_str())
195 .map(|message| normalize_subject(&message.subject))
196 .unwrap_or_default();
197 let has_headers = thread.messages.iter().any(|id| {
198 message_map.get(id.as_str()).is_some_and(|message| {
199 message.in_reply_to.is_some() || !message.references.is_empty()
200 })
201 });
202
203 if !has_headers && !key.is_empty() {
204 if let Some(index) = subject_roots.get(&key).copied() {
205 let target = &mut merged[index];
206 for message_id in &thread.messages {
207 if !target.messages.contains(message_id) {
208 target.messages.push(message_id.clone());
209 }
210 }
211 sort_thread_messages(target, message_map);
212 continue;
213 }
214 }
215
216 let mut thread = thread;
217 sort_thread_messages(&mut thread, message_map);
218 if !key.is_empty() {
219 subject_roots.entry(key).or_insert_with(|| merged.len());
220 }
221 merged.push(thread);
222 }
223
224 merged
225}
226
227fn sort_thread_messages(
228 thread: &mut ThreadTree,
229 message_map: &HashMap<&str, &MessageForThreading>,
230) {
231 thread.messages.sort_by(|left, right| {
232 let left_date = message_map
233 .get(left.as_str())
234 .map(|message| message.date)
235 .unwrap_or_default();
236 let right_date = message_map
237 .get(right.as_str())
238 .map(|message| message.date)
239 .unwrap_or_default();
240 left_date.cmp(&right_date)
241 });
242 if let Some(first) = thread.messages.first() {
243 thread.root_message_id = first.clone();
244 }
245}
246
247fn normalize_subject(subject: &str) -> String {
248 let mut normalized = subject.trim();
249
250 loop {
251 let Some(prefix_end) = normalized.find(':') else {
252 break;
253 };
254 let prefix = normalized[..prefix_end].trim();
255 let lower = prefix.to_ascii_lowercase();
256 let base = lower
257 .trim_end_matches(|ch: char| {
258 ch.is_ascii_digit() || matches!(ch, '[' | ']' | '(' | ')' | ' ')
259 })
260 .trim();
261
262 if matches!(
263 base,
264 "re" | "fw" | "fwd" | "aw" | "sv" | "antw" | "rv" | "odp" | "tr" | "wg"
265 ) {
266 normalized = normalized[prefix_end + 1..].trim();
267 continue;
268 }
269
270 break;
271 }
272
273 normalized.to_ascii_lowercase()
274}
275
276#[cfg(test)]
277mod tests {
278 use super::*;
279
280 fn msg(id: &str, reply_to: Option<&str>, refs: &[&str], subject: &str) -> MessageForThreading {
281 MessageForThreading {
282 message_id: id.to_string(),
283 in_reply_to: reply_to.map(|s| s.to_string()),
284 references: refs.iter().map(|s| s.to_string()).collect(),
285 date: Utc::now(),
286 subject: subject.to_string(),
287 }
288 }
289
290 #[test]
291 fn jwz_threading_basic() {
292 let messages = vec![
293 msg("msg1@ex", None, &[], "Hello"),
294 msg("msg2@ex", Some("msg1@ex"), &["msg1@ex"], "Re: Hello"),
295 msg(
296 "msg3@ex",
297 Some("msg2@ex"),
298 &["msg1@ex", "msg2@ex"],
299 "Re: Re: Hello",
300 ),
301 ];
302
303 let threads = thread_messages(&messages);
304 assert_eq!(threads.len(), 1, "Should form one thread");
305 assert_eq!(
306 threads[0].messages.len(),
307 3,
308 "Thread should have 3 messages"
309 );
310 assert_eq!(threads[0].root_message_id, "msg1@ex");
311 }
312
313 #[test]
314 fn jwz_threading_two_independent_threads() {
315 let messages = vec![
316 msg("a1@ex", None, &[], "Topic A"),
317 msg("a2@ex", Some("a1@ex"), &["a1@ex"], "Re: Topic A"),
318 msg("b1@ex", None, &[], "Topic B"),
319 msg("b2@ex", Some("b1@ex"), &["b1@ex"], "Re: Topic B"),
320 ];
321
322 let threads = thread_messages(&messages);
323 assert_eq!(threads.len(), 2, "Should form two threads");
324 }
325
326 #[test]
327 fn jwz_threading_missing_references() {
328 let messages = vec![
330 msg("msg2@ex", Some("msg1@ex"), &["msg1@ex"], "Re: Hello"),
331 msg(
332 "msg3@ex",
333 Some("msg2@ex"),
334 &["msg1@ex", "msg2@ex"],
335 "Re: Re: Hello",
336 ),
337 ];
338
339 let threads = thread_messages(&messages);
340 assert_eq!(threads.len(), 1);
342 assert_eq!(threads[0].messages.len(), 2);
343 }
344
345 #[test]
346 fn jwz_threading_no_replies() {
347 let messages = vec![
348 msg("msg1@ex", None, &[], "Hello"),
349 msg("msg2@ex", None, &[], "World"),
350 ];
351
352 let threads = thread_messages(&messages);
353 assert_eq!(threads.len(), 2, "Each message is its own thread");
354 }
355
356 #[test]
357 fn jwz_threading_empty_input() {
358 let threads = thread_messages(&[]);
359 assert!(threads.is_empty());
360 }
361
362 #[test]
363 fn subject_fallback_groups_headerless_replies() {
364 let messages = vec![
365 msg("msg1@ex", None, &[], "Hello"),
366 msg("msg2@ex", None, &[], "Re: Hello"),
367 msg("msg3@ex", None, &[], "AW: Hello"),
368 ];
369
370 let threads = thread_messages(&messages);
371 assert_eq!(threads.len(), 1);
372 assert_eq!(threads[0].messages.len(), 3);
373 }
374
375 #[test]
376 fn subject_fallback_attaches_headerless_reply_to_header_thread() {
377 let messages = vec![
378 msg("msg1@ex", None, &[], "Topic"),
379 msg("msg2@ex", Some("msg1@ex"), &["msg1@ex"], "Re: Topic"),
380 msg("msg3@ex", None, &[], "SV: Topic"),
381 ];
382
383 let threads = thread_messages(&messages);
384 assert_eq!(threads.len(), 1);
385 assert_eq!(threads[0].messages.len(), 3);
386 }
387
388 #[test]
389 fn subject_fallback_does_not_merge_independent_header_threads() {
390 let messages = vec![
391 msg("root-a@ex", None, &[], "Topic"),
392 msg("reply-a@ex", Some("root-a@ex"), &["root-a@ex"], "Re: Topic"),
393 msg("root-b@ex", None, &[], "Topic"),
394 msg("reply-b@ex", Some("root-b@ex"), &["root-b@ex"], "Re: Topic"),
395 ];
396
397 let threads = thread_messages(&messages);
398 assert_eq!(threads.len(), 2);
399 }
400}