Skip to main content

mxr_sync/
threading.rs

1//! JWZ threading algorithm — reconstruct threads from In-Reply-To + References headers.
2//! See https://www.jwz.org/doc/threading.html
3
4use chrono::{DateTime, Utc};
5use std::collections::HashMap;
6
7#[derive(Debug, Clone)]
8pub struct MessageForThreading {
9    pub message_id: String,
10    pub in_reply_to: Option<String>,
11    pub references: Vec<String>,
12    pub date: DateTime<Utc>,
13    pub subject: String,
14}
15
16#[derive(Debug, Clone)]
17struct Container {
18    message: Option<MessageForThreading>,
19    parent: Option<String>,
20    children: Vec<String>,
21}
22
23impl Container {
24    fn empty() -> Self {
25        Self {
26            message: None,
27            parent: None,
28            children: Vec::new(),
29        }
30    }
31}
32
33#[derive(Debug, Clone)]
34pub struct ThreadTree {
35    pub root_message_id: String,
36    pub messages: Vec<String>,
37}
38
39/// JWZ threading: reconstruct threads from In-Reply-To + References headers.
40pub fn thread_messages(messages: &[MessageForThreading]) -> Vec<ThreadTree> {
41    if messages.is_empty() {
42        return Vec::new();
43    }
44
45    let message_map: HashMap<&str, &MessageForThreading> = messages
46        .iter()
47        .map(|message| (message.message_id.as_str(), message))
48        .collect();
49    let mut id_table: HashMap<String, Container> = HashMap::new();
50
51    // Step 1: Build ID table
52    for msg in messages {
53        let container = id_table
54            .entry(msg.message_id.clone())
55            .or_insert_with(Container::empty);
56        container.message = Some(msg.clone());
57
58        // Walk References header, link each pair as parent→child
59        let mut prev_id: Option<&str> = None;
60        for ref_id in &msg.references {
61            id_table
62                .entry(ref_id.clone())
63                .or_insert_with(Container::empty);
64            if let Some(parent_id) = prev_id {
65                // Set parent_id as parent of ref_id (if not already parented and no cycle)
66                let ref_container = id_table.get(ref_id).unwrap();
67                if ref_container.parent.is_none()
68                    && !would_create_cycle(&id_table, parent_id, ref_id)
69                {
70                    // Clone to avoid borrow issues
71                    let parent_id_owned = parent_id.to_string();
72                    let ref_id_owned = ref_id.clone();
73                    if let Some(c) = id_table.get_mut(&ref_id_owned) {
74                        c.parent = Some(parent_id_owned.clone());
75                    }
76                    if let Some(p) = id_table.get_mut(&parent_id_owned) {
77                        if !p.children.contains(&ref_id_owned) {
78                            p.children.push(ref_id_owned);
79                        }
80                    }
81                }
82            }
83            prev_id = Some(ref_id);
84        }
85
86        // Set last reference (or In-Reply-To) as parent of this message
87        let parent = msg.in_reply_to.as_deref().or(prev_id);
88        if let Some(parent_id) = parent {
89            if parent_id != msg.message_id
90                && !would_create_cycle(&id_table, parent_id, &msg.message_id)
91            {
92                let parent_id_owned = parent_id.to_string();
93                let msg_id = msg.message_id.clone();
94                if let Some(c) = id_table.get_mut(&msg_id) {
95                    c.parent = Some(parent_id_owned.clone());
96                }
97                if let Some(p) = id_table.get_mut(&parent_id_owned) {
98                    if !p.children.contains(&msg_id) {
99                        p.children.push(msg_id);
100                    }
101                }
102            }
103        }
104    }
105
106    // Step 2: Find root set
107    // A root is either:
108    // - A container with no parent that has a message
109    // - A container whose parent is a phantom (no message) with no parent itself
110    let mut roots: Vec<String> = Vec::new();
111    for (id, container) in &id_table {
112        if container.message.is_none() {
113            continue;
114        }
115        match &container.parent {
116            None => roots.push(id.clone()),
117            Some(parent_id) => {
118                // If parent is phantom (no message) and has no parent, promote this as root
119                if let Some(parent) = id_table.get(parent_id) {
120                    if parent.message.is_none() && parent.parent.is_none() {
121                        roots.push(id.clone());
122                    }
123                }
124            }
125        }
126    }
127
128    // Sort roots by date (earliest first)
129    roots.sort_by(|a, b| {
130        let date_a = id_table
131            .get(a)
132            .and_then(|c| c.message.as_ref())
133            .map(|m| m.date)
134            .unwrap_or_default();
135        let date_b = id_table
136            .get(b)
137            .and_then(|c| c.message.as_ref())
138            .map(|m| m.date)
139            .unwrap_or_default();
140        date_a.cmp(&date_b)
141    });
142
143    // Step 3: Build thread trees
144    let mut threads = Vec::new();
145    for root_id in &roots {
146        let mut thread_messages = Vec::new();
147        collect_thread_messages(&id_table, root_id, &mut thread_messages);
148        if !thread_messages.is_empty() {
149            threads.push(ThreadTree {
150                root_message_id: root_id.clone(),
151                messages: thread_messages,
152            });
153        }
154    }
155
156    merge_subject_fallback_threads(threads, &message_map)
157}
158
159fn collect_thread_messages(table: &HashMap<String, Container>, id: &str, out: &mut Vec<String>) {
160    if let Some(container) = table.get(id) {
161        if container.message.is_some() {
162            out.push(id.to_string());
163        }
164        for child_id in &container.children {
165            collect_thread_messages(table, child_id, out);
166        }
167    }
168}
169
170fn would_create_cycle(table: &HashMap<String, Container>, parent_id: &str, child_id: &str) -> bool {
171    // Walk up from parent_id; if we reach child_id, it's a cycle
172    let mut current = Some(parent_id);
173    while let Some(id) = current {
174        if id == child_id {
175            return true;
176        }
177        current = table.get(id).and_then(|c| c.parent.as_deref());
178    }
179    false
180}
181
182fn merge_subject_fallback_threads(
183    threads: Vec<ThreadTree>,
184    message_map: &HashMap<&str, &MessageForThreading>,
185) -> Vec<ThreadTree> {
186    let mut merged: Vec<ThreadTree> = Vec::new();
187    let mut subject_roots: HashMap<String, usize> = HashMap::new();
188
189    for thread in threads {
190        let Some(first_id) = thread.messages.first() else {
191            continue;
192        };
193        let key = message_map
194            .get(first_id.as_str())
195            .map(|message| normalize_subject(&message.subject))
196            .unwrap_or_default();
197        let has_headers = thread.messages.iter().any(|id| {
198            message_map.get(id.as_str()).is_some_and(|message| {
199                message.in_reply_to.is_some() || !message.references.is_empty()
200            })
201        });
202
203        if !has_headers && !key.is_empty() {
204            if let Some(index) = subject_roots.get(&key).copied() {
205                let target = &mut merged[index];
206                for message_id in &thread.messages {
207                    if !target.messages.contains(message_id) {
208                        target.messages.push(message_id.clone());
209                    }
210                }
211                sort_thread_messages(target, message_map);
212                continue;
213            }
214        }
215
216        let mut thread = thread;
217        sort_thread_messages(&mut thread, message_map);
218        if !key.is_empty() {
219            subject_roots.entry(key).or_insert_with(|| merged.len());
220        }
221        merged.push(thread);
222    }
223
224    merged
225}
226
227fn sort_thread_messages(thread: &mut ThreadTree, message_map: &HashMap<&str, &MessageForThreading>) {
228    thread.messages.sort_by(|left, right| {
229        let left_date = message_map
230            .get(left.as_str())
231            .map(|message| message.date)
232            .unwrap_or_default();
233        let right_date = message_map
234            .get(right.as_str())
235            .map(|message| message.date)
236            .unwrap_or_default();
237        left_date.cmp(&right_date)
238    });
239    if let Some(first) = thread.messages.first() {
240        thread.root_message_id = first.clone();
241    }
242}
243
244fn normalize_subject(subject: &str) -> String {
245    let mut normalized = subject.trim();
246
247    loop {
248        let Some(prefix_end) = normalized.find(':') else {
249            break;
250        };
251        let prefix = normalized[..prefix_end].trim();
252        let lower = prefix.to_ascii_lowercase();
253        let base = lower
254            .trim_end_matches(|ch: char| ch.is_ascii_digit() || matches!(ch, '[' | ']' | '(' | ')' | ' '))
255            .trim();
256
257        if matches!(
258            base,
259            "re" | "fw" | "fwd" | "aw" | "sv" | "antw" | "rv" | "odp" | "tr" | "wg"
260        ) {
261            normalized = normalized[prefix_end + 1..].trim();
262            continue;
263        }
264
265        break;
266    }
267
268    normalized.to_ascii_lowercase()
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274
275    fn msg(id: &str, reply_to: Option<&str>, refs: &[&str], subject: &str) -> MessageForThreading {
276        MessageForThreading {
277            message_id: id.to_string(),
278            in_reply_to: reply_to.map(|s| s.to_string()),
279            references: refs.iter().map(|s| s.to_string()).collect(),
280            date: Utc::now(),
281            subject: subject.to_string(),
282        }
283    }
284
285    #[test]
286    fn jwz_threading_basic() {
287        let messages = vec![
288            msg("msg1@ex", None, &[], "Hello"),
289            msg("msg2@ex", Some("msg1@ex"), &["msg1@ex"], "Re: Hello"),
290            msg(
291                "msg3@ex",
292                Some("msg2@ex"),
293                &["msg1@ex", "msg2@ex"],
294                "Re: Re: Hello",
295            ),
296        ];
297
298        let threads = thread_messages(&messages);
299        assert_eq!(threads.len(), 1, "Should form one thread");
300        assert_eq!(
301            threads[0].messages.len(),
302            3,
303            "Thread should have 3 messages"
304        );
305        assert_eq!(threads[0].root_message_id, "msg1@ex");
306    }
307
308    #[test]
309    fn jwz_threading_two_independent_threads() {
310        let messages = vec![
311            msg("a1@ex", None, &[], "Topic A"),
312            msg("a2@ex", Some("a1@ex"), &["a1@ex"], "Re: Topic A"),
313            msg("b1@ex", None, &[], "Topic B"),
314            msg("b2@ex", Some("b1@ex"), &["b1@ex"], "Re: Topic B"),
315        ];
316
317        let threads = thread_messages(&messages);
318        assert_eq!(threads.len(), 2, "Should form two threads");
319    }
320
321    #[test]
322    fn jwz_threading_missing_references() {
323        // msg2 replies to msg1, but msg1 is not in our set
324        let messages = vec![
325            msg("msg2@ex", Some("msg1@ex"), &["msg1@ex"], "Re: Hello"),
326            msg(
327                "msg3@ex",
328                Some("msg2@ex"),
329                &["msg1@ex", "msg2@ex"],
330                "Re: Re: Hello",
331            ),
332        ];
333
334        let threads = thread_messages(&messages);
335        // msg2 should be the root (since msg1 is a phantom container without a message)
336        assert_eq!(threads.len(), 1);
337        assert_eq!(threads[0].messages.len(), 2);
338    }
339
340    #[test]
341    fn jwz_threading_no_replies() {
342        let messages = vec![
343            msg("msg1@ex", None, &[], "Hello"),
344            msg("msg2@ex", None, &[], "World"),
345        ];
346
347        let threads = thread_messages(&messages);
348        assert_eq!(threads.len(), 2, "Each message is its own thread");
349    }
350
351    #[test]
352    fn jwz_threading_empty_input() {
353        let threads = thread_messages(&[]);
354        assert!(threads.is_empty());
355    }
356
357    #[test]
358    fn subject_fallback_groups_headerless_replies() {
359        let messages = vec![
360            msg("msg1@ex", None, &[], "Hello"),
361            msg("msg2@ex", None, &[], "Re: Hello"),
362            msg("msg3@ex", None, &[], "AW: Hello"),
363        ];
364
365        let threads = thread_messages(&messages);
366        assert_eq!(threads.len(), 1);
367        assert_eq!(threads[0].messages.len(), 3);
368    }
369
370    #[test]
371    fn subject_fallback_attaches_headerless_reply_to_header_thread() {
372        let messages = vec![
373            msg("msg1@ex", None, &[], "Topic"),
374            msg("msg2@ex", Some("msg1@ex"), &["msg1@ex"], "Re: Topic"),
375            msg("msg3@ex", None, &[], "SV: Topic"),
376        ];
377
378        let threads = thread_messages(&messages);
379        assert_eq!(threads.len(), 1);
380        assert_eq!(threads[0].messages.len(), 3);
381    }
382
383    #[test]
384    fn subject_fallback_does_not_merge_independent_header_threads() {
385        let messages = vec![
386            msg("root-a@ex", None, &[], "Topic"),
387            msg("reply-a@ex", Some("root-a@ex"), &["root-a@ex"], "Re: Topic"),
388            msg("root-b@ex", None, &[], "Topic"),
389            msg("reply-b@ex", Some("root-b@ex"), &["root-b@ex"], "Re: Topic"),
390        ];
391
392        let threads = thread_messages(&messages);
393        assert_eq!(threads.len(), 2);
394    }
395}