Skip to main content

mailrs_mailbox/
threading.rs

1/// strip angle brackets from a Message-ID
2pub fn normalize_message_id(id: &str) -> &str {
3    let trimmed = id.trim();
4    trimmed
5        .strip_prefix('<')
6        .and_then(|s| s.strip_suffix('>'))
7        .unwrap_or(trimmed)
8}
9
10/// determine the thread_id for a message
11///
12/// - if `in_reply_to` is empty, start a new thread using `own_id`
13/// - if `in_reply_to` has a value, look up the parent's thread_id
14///   - if found, reuse the parent's thread_id
15///   - if not found, use `in_reply_to` as thread_id (orphan reply)
16pub fn resolve_thread_id<F>(own_id: &str, in_reply_to: &str, lookup: F) -> String
17where
18    F: Fn(&str) -> Option<String>,
19{
20    if in_reply_to.is_empty() {
21        return own_id.to_string();
22    }
23    match lookup(in_reply_to) {
24        Some(tid) => tid,
25        None => in_reply_to.to_string(),
26    }
27}
28
29/// extract Message-ID header value from raw RFC 5322 bytes
30pub fn extract_message_id(data: &[u8]) -> String {
31    extract_header_raw(data, "message-id")
32}
33
34/// extract In-Reply-To header value from raw RFC 5322 bytes
35pub fn extract_in_reply_to(data: &[u8]) -> String {
36    extract_header_raw(data, "in-reply-to")
37}
38
39fn extract_header_raw(data: &[u8], name: &str) -> String {
40    let text = String::from_utf8_lossy(data);
41    let prefix_len = name.len() + 1; // "name:"
42    for line in text.lines() {
43        if line.len() > prefix_len && line.as_bytes()[name.len()] == b':'
44            && line[..name.len()].eq_ignore_ascii_case(name) {
45                let val = line[prefix_len..].trim();
46                return normalize_message_id(val).to_string();
47        }
48        // empty line = end of headers
49        if line.is_empty() {
50            break;
51        }
52    }
53    String::new()
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59
60    #[test]
61    fn normalize_strips_angle_brackets() {
62        assert_eq!(normalize_message_id("<abc@host>"), "abc@host");
63    }
64
65    #[test]
66    fn normalize_no_brackets() {
67        assert_eq!(normalize_message_id("abc@host"), "abc@host");
68    }
69
70    #[test]
71    fn normalize_whitespace() {
72        assert_eq!(normalize_message_id("  <abc@host>  "), "abc@host");
73    }
74
75    #[test]
76    fn resolve_new_thread() {
77        let tid = resolve_thread_id("own@host", "", |_| None);
78        assert_eq!(tid, "own@host");
79    }
80
81    #[test]
82    fn resolve_existing_parent() {
83        let tid = resolve_thread_id("own@host", "parent@host", |id| {
84            assert_eq!(id, "parent@host");
85            Some("root@host".to_string())
86        });
87        assert_eq!(tid, "root@host");
88    }
89
90    #[test]
91    fn resolve_orphan_reply() {
92        let tid = resolve_thread_id("own@host", "parent@host", |_| None);
93        assert_eq!(tid, "parent@host");
94    }
95
96    #[test]
97    fn extract_message_id_from_bytes() {
98        let data = b"From: a@b.com\r\nMessage-ID: <123@host>\r\nSubject: hi\r\n\r\nbody";
99        assert_eq!(extract_message_id(data), "123@host");
100    }
101
102    #[test]
103    fn extract_in_reply_to_from_bytes() {
104        let data = b"From: a@b.com\r\nIn-Reply-To: <parent@host>\r\n\r\nbody";
105        assert_eq!(extract_in_reply_to(data), "parent@host");
106    }
107
108    #[test]
109    fn extract_missing_header() {
110        let data = b"From: a@b.com\r\nSubject: hi\r\n\r\nbody";
111        assert_eq!(extract_message_id(data), "");
112        assert_eq!(extract_in_reply_to(data), "");
113    }
114
115    #[test]
116    fn extract_case_insensitive() {
117        let data = b"message-id: <lower@host>\r\n\r\n";
118        assert_eq!(extract_message_id(data), "lower@host");
119    }
120
121    #[test]
122    fn normalize_only_open_bracket() {
123        assert_eq!(normalize_message_id("<abc@host"), "<abc@host");
124    }
125
126    #[test]
127    fn normalize_only_close_bracket() {
128        assert_eq!(normalize_message_id("abc@host>"), "abc@host>");
129    }
130
131    #[test]
132    fn normalize_empty_string() {
133        assert_eq!(normalize_message_id(""), "");
134    }
135
136    #[test]
137    fn normalize_empty_brackets() {
138        assert_eq!(normalize_message_id("<>"), "");
139    }
140
141    #[test]
142    fn normalize_nested_brackets() {
143        assert_eq!(normalize_message_id("<<inner>>"), "<inner>");
144    }
145
146    #[test]
147    fn resolve_uses_lookup_result() {
148        let tid = resolve_thread_id("own@host", "parent@host", |id| {
149            if id == "parent@host" {
150                Some("thread-root".to_string())
151            } else {
152                None
153            }
154        });
155        assert_eq!(tid, "thread-root");
156    }
157
158    #[test]
159    fn resolve_empty_own_id_with_empty_reply_to() {
160        let tid = resolve_thread_id("", "", |_| None);
161        assert_eq!(tid, "");
162    }
163
164    #[test]
165    fn extract_message_id_stops_at_empty_line() {
166        let data = b"Subject: hi\r\n\r\nMessage-ID: <body@host>\r\n";
167        assert_eq!(extract_message_id(data), "");
168    }
169
170    #[test]
171    fn extract_message_id_upper_case_header() {
172        let data = b"MESSAGE-ID: <UPPER@host>\r\n\r\n";
173        assert_eq!(extract_message_id(data), "UPPER@host");
174    }
175
176    #[test]
177    fn extract_in_reply_to_multiple_headers() {
178        // should return first match
179        let data = b"In-Reply-To: <first@host>\r\nIn-Reply-To: <second@host>\r\n\r\n";
180        assert_eq!(extract_in_reply_to(data), "first@host");
181    }
182
183    #[test]
184    fn extract_header_no_crlf() {
185        let data = b"Message-ID: <no-crlf@host>";
186        assert_eq!(extract_message_id(data), "no-crlf@host");
187    }
188
189    #[test]
190    fn extract_header_lf_only() {
191        let data = b"Message-ID: <lf@host>\n\nbody";
192        assert_eq!(extract_message_id(data), "lf@host");
193    }
194
195    #[test]
196    fn extract_empty_data() {
197        assert_eq!(extract_message_id(b""), "");
198    }
199
200    // ===== Additional corner-case tests =====
201
202    #[test]
203    fn normalize_internal_brackets_preserved() {
204        // brackets that aren't at the ends should be left in place
205        assert_eq!(normalize_message_id("ab<cd>ef"), "ab<cd>ef");
206    }
207
208    #[test]
209    fn normalize_only_whitespace() {
210        // pure whitespace trims to empty
211        assert_eq!(normalize_message_id("   "), "");
212    }
213
214    #[test]
215    fn normalize_tab_and_newline_trim() {
216        assert_eq!(normalize_message_id("\t<x@y>\n"), "x@y");
217    }
218
219    #[test]
220    fn resolve_thread_id_lookup_never_called_for_empty_reply_to() {
221        // Confirms the fast path: when in_reply_to is empty, lookup is not invoked.
222        let called = std::cell::Cell::new(false);
223        let tid = resolve_thread_id("own@host", "", |_| {
224            called.set(true);
225            Some("should-not-be-used".to_string())
226        });
227        assert_eq!(tid, "own@host");
228        assert!(!called.get(), "lookup must not be invoked for empty in_reply_to");
229    }
230
231    #[test]
232    fn resolve_thread_id_lookup_called_exactly_once_for_nonempty_reply_to() {
233        let count = std::cell::Cell::new(0u32);
234        let _ = resolve_thread_id("own@host", "parent@host", |_| {
235            count.set(count.get() + 1);
236            Some("root@host".to_string())
237        });
238        assert_eq!(count.get(), 1);
239    }
240
241    #[test]
242    fn resolve_thread_id_orphan_uses_normalized_reply_to_verbatim() {
243        // The lookup miss path returns the in_reply_to *verbatim* — it is the
244        // caller's responsibility to normalize before invoking.
245        let tid = resolve_thread_id("own@host", "<not-normalized@host>", |_| None);
246        assert_eq!(tid, "<not-normalized@host>", "orphan path does not auto-strip");
247    }
248
249    #[test]
250    fn extract_header_with_leading_spaces_in_value() {
251        // RFC 5322 allows whitespace between colon and value; ensure trim handles it
252        let data = b"Message-ID:      <padded@host>\r\n\r\n";
253        assert_eq!(extract_message_id(data), "padded@host");
254    }
255
256    #[test]
257    fn extract_header_partial_match_does_not_collide() {
258        // "Message-ID-Extra" should not match "Message-ID"
259        let data = b"Message-ID-Extra: nope\r\nMessage-ID: <real@host>\r\n\r\n";
260        assert_eq!(extract_message_id(data), "real@host");
261    }
262
263    #[test]
264    fn extract_header_value_only_colon_no_value() {
265        // header with colon but no value should normalize to empty
266        let data = b"Message-ID:\r\n\r\nbody";
267        assert_eq!(extract_message_id(data), "");
268    }
269
270    #[test]
271    fn extract_in_reply_to_with_brackets_normalized() {
272        // verify In-Reply-To extraction also strips brackets
273        let data = b"In-Reply-To: <abc@def>\r\n\r\n";
274        assert_eq!(extract_in_reply_to(data), "abc@def");
275    }
276
277    #[test]
278    fn extract_header_invalid_utf8_falls_back_to_lossy() {
279        // invalid UTF-8 in the body shouldn't panic
280        let data: &[u8] = b"Message-ID: <ok@host>\r\nSubject: \xff\xfe garbled\r\n\r\nbody";
281        assert_eq!(extract_message_id(data), "ok@host");
282    }
283
284    #[test]
285    fn normalize_double_strip_idempotent() {
286        // Running normalize twice yields the same result as once.
287        let once = normalize_message_id("<abc@host>");
288        let twice = normalize_message_id(once);
289        assert_eq!(once, twice);
290    }
291
292    #[test]
293    fn resolve_thread_id_chain_of_replies_uses_lookup_result() {
294        // Simulates a small in-memory "table" the lookup consults — common usage shape.
295        let table = std::collections::HashMap::from([
296            ("a@host".to_string(), "thread-1".to_string()),
297            ("b@host".to_string(), "thread-1".to_string()),
298        ]);
299        let t1 = resolve_thread_id("c@host", "a@host", |id| table.get(id).cloned());
300        let t2 = resolve_thread_id("c@host", "b@host", |id| table.get(id).cloned());
301        assert_eq!(t1, "thread-1");
302        assert_eq!(t2, "thread-1", "two different parents in the same thread");
303    }
304
305    #[test]
306    fn extract_header_only_newline_separator() {
307        // Just one LF between header and body
308        let data = b"Message-ID: <lf-only@host>\n";
309        assert_eq!(extract_message_id(data), "lf-only@host");
310    }
311
312    #[test]
313    fn extract_header_blank_data_before_header() {
314        // RFC says first blank line ends headers — so a leading blank line means no headers
315        let data = b"\r\nMessage-ID: <after-blank@host>\r\n";
316        assert_eq!(extract_message_id(data), "");
317    }
318}