1use anyhow::{Context, Result, bail};
56use chrono::DateTime;
57use mailparse::{MailHeaderMap, parse_headers};
58
59#[derive(Debug, Clone, PartialEq)]
60pub struct PatchMetadata {
61 pub commit_id: String,
62 pub author_name: String,
63 pub author_email: String,
64 pub author_timestamp: i64,
65 pub author_offset_minutes: i32,
66 pub committer_timestamp: Option<i64>,
67 pub subject: String,
68 pub body: String,
69}
70
71pub fn parse_mbox_patch(content: &str) -> Result<PatchMetadata> {
72 let commit_id = extract_commit_id_from_mbox(content)?;
73 let (author_name, author_email) = extract_author_from_from_header(content)?;
74 let (author_timestamp, author_offset_minutes) = extract_date_from_header(content)?;
75 let committer_timestamp = None;
76 let subject = extract_subject(content)?;
77 let body = extract_commit_message_body(content)?;
78
79 Ok(PatchMetadata {
80 commit_id,
81 author_name,
82 author_email,
83 author_timestamp,
84 author_offset_minutes,
85 committer_timestamp,
86 subject,
87 body,
88 })
89}
90
91fn extract_commit_id_from_mbox(content: &str) -> Result<String> {
100 if !content.starts_with("From ") {
101 bail!("patch does not start with 'From ' - not a valid mbox format");
102 }
103
104 let first_line = content.lines().next().context("patch content is empty")?;
105
106 let parts: Vec<&str> = first_line.split_whitespace().collect();
107 if parts.len() < 2 {
108 bail!("mbox 'From ' line does not contain a commit id");
109 }
110
111 Ok(parts[1].to_string())
112}
113
114fn extract_header_section(content: &str) -> &str {
117 let after_envelope = content
120 .find('\n')
121 .map(|pos| &content[pos + 1..])
122 .unwrap_or("");
123 let header_end = after_envelope
126 .find("\n\n")
127 .map(|pos| pos + 2)
128 .unwrap_or(after_envelope.len());
129 &after_envelope[..header_end]
130}
131
132fn extract_author_from_from_header(content: &str) -> Result<(String, String)> {
133 let header_bytes = extract_header_section(content).as_bytes();
134 if let Ok((headers, _)) = parse_headers(header_bytes) {
135 if let Some(from_value) = headers.get_first_value("From") {
136 return parse_from_header_value(&from_value);
137 }
138 }
139
140 let from_line = content
142 .lines()
143 .find(|line| line.starts_with("From:"))
144 .context("patch does not contain a 'From:' header")?;
145
146 let from_value = from_line
147 .strip_prefix("From:")
148 .context("failed to strip 'From:' prefix")?
149 .trim();
150
151 parse_from_header_value(from_value)
152}
153
154fn parse_from_header_value(value: &str) -> Result<(String, String)> {
155 if let Some(start) = value.find('<') {
156 if let Some(end) = value.find('>') {
157 let email = value[start + 1..end].to_string();
158 let name_part = value[..start].trim();
159 let name = name_part.trim_matches('"').trim().to_string();
160 return Ok((name, email));
161 }
162 }
163
164 if value.contains('@') {
165 let email = value.trim().to_string();
166 let name = email.split('@').next().unwrap_or("unknown").to_string();
167 return Ok((name, email));
168 }
169
170 bail!("could not parse From header: {}", value)
171}
172
173fn extract_date_from_header(content: &str) -> Result<(i64, i32)> {
174 let date_line = content
175 .lines()
176 .find(|line| line.starts_with("Date:"))
177 .context("patch does not contain a 'Date:' header")?;
178
179 let date_value = date_line
180 .strip_prefix("Date:")
181 .context("failed to strip 'Date:' prefix")?
182 .trim();
183
184 parse_rfc2822_date(date_value)
185}
186
187fn parse_rfc2822_date(value: &str) -> Result<(i64, i32)> {
188 let parsed = DateTime::parse_from_rfc2822(value)
189 .context(format!("failed to parse RFC2822 date: {}", value))?;
190
191 let timestamp = parsed.timestamp();
192 let offset_minutes = parsed.offset().local_minus_utc() / 60;
193
194 Ok((timestamp, offset_minutes))
195}
196
197fn extract_subject(content: &str) -> Result<String> {
198 let header_bytes = extract_header_section(content).as_bytes();
200 if let Ok((headers, _)) = parse_headers(header_bytes) {
201 if let Some(subject_value) = headers.get_first_value("Subject") {
202 return Ok(cleanup_subject(&subject_value));
203 }
204 }
205
206 let subject_line = content
208 .lines()
209 .find(|line| line.starts_with("Subject:"))
210 .context("patch does not contain a 'Subject:' header")?;
211
212 let subject_value = subject_line
213 .strip_prefix("Subject:")
214 .context("failed to strip 'Subject:' prefix")?
215 .trim();
216
217 Ok(cleanup_subject(subject_value))
218}
219
220fn cleanup_subject(subject: &str) -> String {
221 let mut result = subject.to_string();
222
223 loop {
224 let trimmed = result.trim();
225
226 if trimmed.starts_with("Re:") || trimmed.starts_with("re:") {
227 result = trimmed[3..].trim().to_string();
228 continue;
229 }
230
231 if let Some(stripped) = trimmed.strip_prefix(':') {
232 result = stripped.trim().to_string();
233 continue;
234 }
235
236 if trimmed.starts_with('[') {
237 if let Some(end) = trimmed.find(']') {
238 result = trimmed[end + 1..].trim().to_string();
239 continue;
240 }
241 }
242
243 break;
244 }
245
246 result
247}
248
249fn extract_commit_message_body(content: &str) -> Result<String> {
250 let mut in_body = false;
251 let mut body_lines: Vec<String> = Vec::new();
252 let mut found_first_content = false;
253
254 for line in content.lines() {
255 if !in_body {
256 if line.is_empty() {
257 in_body = true;
258 }
259 continue;
260 }
261
262 if line.starts_with("diff --git ")
263 || line.starts_with("Index: ")
264 || line.starts_with("--- ")
265 || line.starts_with("From ")
266 {
267 break;
268 }
269
270 if line.starts_with("---") && line.trim().eq("---") {
271 break;
272 }
273
274 if line == "-- " {
278 break;
279 }
280
281 if !found_first_content && line.trim().is_empty() {
282 continue;
283 }
284
285 found_first_content = true;
286 body_lines.push(line.to_string());
287 }
288
289 while body_lines.last().is_some_and(|l| l.trim().is_empty()) {
290 body_lines.pop();
291 }
292
293 Ok(body_lines.join("\n").trim().to_string())
294}
295
296pub fn extract_description_from_patch(content: &str) -> Result<String> {
297 let subject = extract_subject(content)?;
298 let body = extract_commit_message_body(content)?;
299
300 if body.is_empty() {
301 Ok(subject)
302 } else {
303 Ok(format!("{}\n\n{}", subject, body))
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310
311 fn sample_patch() -> String {
312 "\
313From 431b84edc0d2fa118d63faa3c2db9c73d630a5ae Mon Sep 17 00:00:00 2001
314From: Joe Bloggs <joe.bloggs@pm.me>
315Date: Thu, 1 Jan 1970 00:00:00 +0000
316Subject: [PATCH] add t2.md
317
318This is the commit message body.
319
320It can have multiple lines.
321
322---
323 t2.md | 1 +
324 1 file changed, 1 insertion(+)
325 create mode 100644 t2.md
326
327diff --git a/t2.md b/t2.md
328new file mode 100644
329index 0000000..a66525d
330--- /dev/null
331+++ b/t2.md
332@@ -0,0 +1 @@
333+some content1
334\\ No newline at end of file
335--
336libgit2 1.9.2
337
338"
339 .to_string()
340 }
341
342 #[test]
343 fn parse_commit_id() {
344 let patch = sample_patch();
345 let result = extract_commit_id_from_mbox(&patch).unwrap();
346 assert_eq!(result, "431b84edc0d2fa118d63faa3c2db9c73d630a5ae");
347 }
348
349 #[test]
350 fn parse_author() {
351 let patch = sample_patch();
352 let (name, email) = extract_author_from_from_header(&patch).unwrap();
353 assert_eq!(name, "Joe Bloggs");
354 assert_eq!(email, "joe.bloggs@pm.me");
355 }
356
357 #[test]
358 fn parse_author_with_quoted_name() {
359 let patch = "\
360From abc123 Mon Sep 17 00:00:00 2001
361From: \"John (nickname) Doe\" <john.doe@example.com>
362Date: Thu, 1 Jan 1970 00:00:00 +0000
363Subject: test
364
365Body
366";
367 let (name, email) = extract_author_from_from_header(patch).unwrap();
368 assert_eq!(name, "John (nickname) Doe");
369 assert_eq!(email, "john.doe@example.com");
370 }
371
372 #[test]
373 fn parse_author_email_only() {
374 let patch = "\
375From abc123 Mon Sep 17 00:00:00 2001
376From: john.doe@example.com
377Date: Thu, 1 Jan 1970 00:00:00 +0000
378Subject: test
379
380Body
381";
382 let (name, email) = extract_author_from_from_header(patch).unwrap();
383 assert_eq!(name, "john.doe");
384 assert_eq!(email, "john.doe@example.com");
385 }
386
387 #[test]
388 fn parse_date() {
389 let patch = sample_patch();
390 let (timestamp, offset) = extract_date_from_header(&patch).unwrap();
391 assert_eq!(timestamp, 0);
392 assert_eq!(offset, 0);
393 }
394
395 #[test]
396 fn parse_date_with_timezone() {
397 let patch = "\
398From abc123 Mon Sep 17 00:00:00 2001
399From: Joe <joe@example.com>
400Date: Thu, 1 Jan 1970 00:00:00 +0500
401Subject: test
402
403Body
404";
405 let (timestamp, offset) = extract_date_from_header(patch).unwrap();
406 assert_eq!(timestamp, -18000);
407 assert_eq!(offset, 300);
408 }
409
410 #[test]
411 fn parse_subject() {
412 let patch = sample_patch();
413 let subject = extract_subject(&patch).unwrap();
414 assert_eq!(subject, "add t2.md");
415 }
416
417 #[test]
418 fn parse_subject_with_patch_prefix() {
419 let patch = "\
420From abc123 Mon Sep 17 00:00:00 2001
421From: Joe <joe@example.com>
422Date: Thu, 1 Jan 1970 00:00:00 +0000
423Subject: [PATCH v2 3/5] fix: important bug
424
425Body
426";
427 let subject = extract_subject(patch).unwrap();
428 assert_eq!(subject, "fix: important bug");
429 }
430
431 #[test]
432 fn parse_subject_with_re_prefix() {
433 let patch = "\
434From abc123 Mon Sep 17 00:00:00 2001
435From: Joe <joe@example.com>
436Date: Thu, 1 Jan 1970 00:00:00 +0000
437Subject: Re: [PATCH] fix: important bug
438
439Body
440";
441 let subject = extract_subject(patch).unwrap();
442 assert_eq!(subject, "fix: important bug");
443 }
444
445 #[test]
446 fn parse_subject_folded_rfc2822() {
447 let patch = "\
449From abc123 Mon Sep 17 00:00:00 2001
450From: Joe <joe@example.com>
451Date: Thu, 1 Jan 1970 00:00:00 +0000
452Subject: [PATCH] fix: this is a very long commit message subject line
453 that has been folded across two lines by RFC 2822 rules
454
455Body
456";
457 let subject = extract_subject(patch).unwrap();
458 assert_eq!(
459 subject,
460 "fix: this is a very long commit message subject line that has been folded across two lines by RFC 2822 rules"
461 );
462 }
463
464 #[test]
465 fn parse_subject_mime_q_encoded() {
466 let patch = "\
468From abc123 Mon Sep 17 00:00:00 2001
469From: Joe <joe@example.com>
470Date: Thu, 1 Jan 1970 00:00:00 +0000
471Subject: [PATCH] =?UTF-8?q?fix=3A_add_=E2=9C=93_check?=
472
473Body
474";
475 let subject = extract_subject(patch).unwrap();
476 assert_eq!(subject, "fix: add \u{2713} check");
478 }
479
480 #[test]
481 fn parse_subject_mime_b_encoded() {
482 let patch = "\
485From abc123 Mon Sep 17 00:00:00 2001
486From: Joe <joe@example.com>
487Date: Thu, 1 Jan 1970 00:00:00 +0000
488Subject: [PATCH] =?UTF-8?b?Zml4OiByw6lzdW3DqQ==?=
489
490Body
491";
492 let subject = extract_subject(patch).unwrap();
493 assert_eq!(subject, "fix: r\u{e9}sum\u{e9}");
495 }
496
497 #[test]
498 fn parse_body() {
499 let patch = sample_patch();
500 let body = extract_commit_message_body(&patch).unwrap();
501 assert_eq!(
502 body,
503 "This is the commit message body.\n\nIt can have multiple lines."
504 );
505 }
506
507 #[test]
508 fn parse_body_empty() {
509 let patch = "\
510From abc123 Mon Sep 17 00:00:00 2001
511From: Joe <joe@example.com>
512Date: Thu, 1 Jan 1970 00:00:00 +0000
513Subject: test
514
515---
516 file.txt | 1 +
517diff --git a/file.txt b/file.txt
518";
519 let body = extract_commit_message_body(patch).unwrap();
520 assert_eq!(body, "");
521 }
522
523 #[test]
524 fn parse_body_stops_at_exact_email_sig_separator() {
525 let patch = "\
527From abc123 Mon Sep 17 00:00:00 2001
528From: Joe <joe@example.com>
529Date: Thu, 1 Jan 1970 00:00:00 +0000
530Subject: [PATCH] test
531
532This is the body.
533--
534libgit2 1.9.2
535
536diff --git a/file.txt b/file.txt
537";
538 let body = extract_commit_message_body(patch).unwrap();
539 assert_eq!(body, "This is the body.");
540 }
541
542 #[test]
543 fn parse_body_does_not_stop_at_double_dash_with_text() {
544 let patch = "\
546From abc123 Mon Sep 17 00:00:00 2001
547From: Joe <joe@example.com>
548Date: Thu, 1 Jan 1970 00:00:00 +0000
549Subject: [PATCH] test
550
551This is the body.
552-- some CLI flag description
553More body text.
554
555---
556diff --git a/file.txt b/file.txt
557";
558 let body = extract_commit_message_body(patch).unwrap();
559 assert_eq!(
560 body,
561 "This is the body.\n-- some CLI flag description\nMore body text."
562 );
563 }
564
565 #[test]
566 fn parse_full_metadata() {
567 let patch = sample_patch();
568 let metadata = parse_mbox_patch(&patch).unwrap();
569
570 assert_eq!(
571 metadata.commit_id,
572 "431b84edc0d2fa118d63faa3c2db9c73d630a5ae"
573 );
574 assert_eq!(metadata.author_name, "Joe Bloggs");
575 assert_eq!(metadata.author_email, "joe.bloggs@pm.me");
576 assert_eq!(metadata.author_timestamp, 0);
577 assert_eq!(metadata.author_offset_minutes, 0);
578 assert_eq!(metadata.committer_timestamp, None);
579 assert_eq!(metadata.subject, "add t2.md");
580 assert_eq!(
581 metadata.body,
582 "This is the commit message body.\n\nIt can have multiple lines."
583 );
584 }
585
586 #[test]
587 fn extract_description_combines_subject_and_body() {
588 let patch = sample_patch();
589 let description = extract_description_from_patch(&patch).unwrap();
590 assert_eq!(
591 description,
592 "add t2.md\n\nThis is the commit message body.\n\nIt can have multiple lines."
593 );
594 }
595
596 #[test]
597 fn extract_description_subject_only() {
598 let patch = "\
599From abc123 Mon Sep 17 00:00:00 2001
600From: Joe <joe@example.com>
601Date: Thu, 1 Jan 1970 00:00:00 +0000
602Subject: [PATCH] simple fix
603
604---
605 file.txt | 1 +
606";
607 let description = extract_description_from_patch(patch).unwrap();
608 assert_eq!(description, "simple fix");
609 }
610
611 #[test]
612 fn cleanup_subject_strips_patch_prefixes() {
613 assert_eq!(cleanup_subject("[PATCH] test"), "test");
614 assert_eq!(cleanup_subject("[PATCH v2] test"), "test");
615 assert_eq!(cleanup_subject("[PATCH 1/3] test"), "test");
616 assert_eq!(cleanup_subject("[PATCH v2 1/3] test"), "test");
617 assert_eq!(cleanup_subject("Re: [PATCH] test"), "test");
618 assert_eq!(cleanup_subject("re: test"), "test");
619 assert_eq!(cleanup_subject(":test"), "test");
620 }
621}