Skip to main content

email_extract/
types.rs

1//! Core types for parsed emails
2
3use crate::extracted::ExtractedEntities;
4use chrono::{DateTime, Utc};
5use serde::{Deserialize, Serialize};
6use std::fmt;
7
8/// A fully parsed email with extracted entities and metadata
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct Email {
11    /// Unique message ID from headers
12    pub message_id: MessageId,
13
14    /// IMAP UID
15    pub uid: u32,
16
17    /// Sender address
18    pub from: EmailAddress,
19
20    /// Primary recipients
21    pub to: Vec<EmailAddress>,
22
23    /// CC recipients
24    pub cc: Vec<EmailAddress>,
25
26    /// BCC recipients (if available)
27    pub bcc: Vec<EmailAddress>,
28
29    /// Reply-To address (if different from From)
30    pub reply_to: Option<EmailAddress>,
31
32    /// Email subject
33    pub subject: Subject,
34
35    /// Email body content
36    pub body: Body,
37
38    /// Date sent/received
39    pub date: DateTime<Utc>,
40
41    /// Email headers
42    pub headers: Headers,
43
44    /// Thread information
45    pub thread: ThreadInfo,
46
47    /// Extracted entities from the email content
48    pub extracted: ExtractedEntities,
49
50    /// Email metadata and analysis
51    pub metadata: EmailMetadata,
52}
53
54/// Message ID wrapper type
55#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
56pub struct MessageId(pub String);
57
58impl MessageId {
59    pub fn new(id: impl Into<String>) -> Self {
60        Self(id.into())
61    }
62
63    /// Generate a synthetic message ID if none provided
64    #[must_use]
65    pub fn synthetic(uid: u32) -> Self {
66        Self(format!("<synthetic-{uid}@local>"))
67    }
68
69    #[must_use]
70    pub fn as_str(&self) -> &str {
71        &self.0
72    }
73}
74
75impl fmt::Display for MessageId {
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77        write!(f, "{}", self.0)
78    }
79}
80
81/// Email address with optional display name
82#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
83pub struct EmailAddress {
84    /// Display name (e.g., "John Doe")
85    pub name: Option<PersonName>,
86
87    /// Email address (e.g., "john@example.com")
88    pub address: String,
89
90    /// Domain extracted from address
91    pub domain: String,
92
93    /// Local part (before @)
94    pub local_part: String,
95}
96
97impl EmailAddress {
98    /// Parse an email address from a string
99    #[must_use]
100    pub fn parse(s: &str) -> Option<Self> {
101        let s = s.trim();
102
103        // Try to match "Name <email@domain.com>" format
104        if let Some(start) = s.find('<')
105            && let Some(end) = s.find('>')
106        {
107            let name_part = s[..start].trim().trim_matches('"');
108            let address = s[start + 1..end].trim().to_string();
109
110            if let Some((local, domain)) = address.split_once('@') {
111                return Some(Self {
112                    name: if name_part.is_empty() {
113                        None
114                    } else {
115                        Some(PersonName::parse(name_part))
116                    },
117                    local_part: local.to_string(),
118                    domain: domain.to_string(),
119                    address,
120                });
121            }
122        }
123
124        // Plain email address
125        if let Some((local, domain)) = s.split_once('@') {
126            return Some(Self {
127                name: None,
128                local_part: local.to_string(),
129                domain: domain.to_string(),
130                address: s.to_string(),
131            });
132        }
133
134        None
135    }
136
137    /// Check if this is likely a noreply/automated address
138    #[must_use]
139    pub fn is_noreply(&self) -> bool {
140        let lower = self.local_part.to_lowercase();
141        lower.contains("noreply")
142            || lower.contains("no-reply")
143            || lower.contains("donotreply")
144            || lower.contains("automated")
145            || lower.contains("mailer-daemon")
146    }
147
148    /// Check if this is from a known email service
149    #[must_use]
150    pub fn is_freemail(&self) -> bool {
151        let domain = self.domain.to_lowercase();
152        matches!(
153            domain.as_str(),
154            "gmail.com"
155                | "yahoo.com"
156                | "outlook.com"
157                | "hotmail.com"
158                | "protonmail.com"
159                | "proton.me"
160                | "icloud.com"
161                | "aol.com"
162        )
163    }
164}
165
166impl fmt::Display for EmailAddress {
167    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
168        match &self.name {
169            Some(name) => write!(f, "{} <{}>", name, self.address),
170            None => write!(f, "{}", self.address),
171        }
172    }
173}
174
175/// Parsed person name
176#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
177pub struct PersonName {
178    /// Full name as provided
179    pub full: String,
180
181    /// First name (if parseable)
182    pub first: Option<String>,
183
184    /// Last name (if parseable)
185    pub last: Option<String>,
186}
187
188impl PersonName {
189    /// Parse a name string
190    #[must_use]
191    pub fn parse(s: &str) -> Self {
192        let s = s.trim().trim_matches('"');
193        let parts: Vec<&str> = s.split_whitespace().collect();
194
195        match parts.len() {
196            0 => Self {
197                full: String::new(),
198                first: None,
199                last: None,
200            },
201            1 => Self {
202                full: parts[0].to_string(),
203                first: Some(parts[0].to_string()),
204                last: None,
205            },
206            _ => Self {
207                full: s.to_string(),
208                first: Some(parts[0].to_string()),
209                last: Some(parts[parts.len() - 1].to_string()),
210            },
211        }
212    }
213}
214
215impl fmt::Display for PersonName {
216    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
217        write!(f, "{}", self.full)
218    }
219}
220
221/// Email subject with analysis
222#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct Subject {
224    /// Original subject line
225    pub original: String,
226
227    /// Subject without <Re:/Fwd>: prefixes
228    pub normalized: String,
229
230    /// Number of Re: prefixes (indicates thread depth)
231    pub reply_depth: u32,
232
233    /// Is this a forward?
234    pub is_forward: bool,
235
236    /// Detected language (ISO 639-1 code)
237    pub language: Option<String>,
238}
239
240impl Subject {
241    /// Parse a subject line
242    #[must_use]
243    pub fn parse(s: &str) -> Self {
244        let mut normalized = s.to_string();
245        let mut reply_depth = 0;
246        let mut is_forward = false;
247
248        // Count and remove Re: prefixes
249        loop {
250            let lower = normalized.to_lowercase();
251            if lower.starts_with("re:") {
252                normalized = normalized[3..].trim_start().to_string();
253                reply_depth += 1;
254            } else if lower.starts_with("re[") {
255                // Handle Re[2]: format
256                if let Some(end) = normalized.find("]:") {
257                    if let Ok(count) = normalized[3..end].parse::<u32>() {
258                        reply_depth += count;
259                    }
260                    normalized = normalized[end + 2..].trim_start().to_string();
261                } else {
262                    break;
263                }
264            } else {
265                break;
266            }
267        }
268
269        // Check for forward
270        let lower = normalized.to_lowercase();
271        if lower.starts_with("fwd:") || lower.starts_with("fw:") {
272            is_forward = true;
273            normalized = normalized
274                .trim_start_matches(|c| {
275                    c == 'F' || c == 'f' || c == 'w' || c == 'W' || c == 'd' || c == 'D' || c == ':'
276                })
277                .trim_start()
278                .to_string();
279        }
280
281        Self {
282            original: s.to_string(),
283            normalized,
284            reply_depth,
285            is_forward,
286            language: None, // Could add language detection
287        }
288    }
289}
290
291impl fmt::Display for Subject {
292    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
293        write!(f, "{}", self.original)
294    }
295}
296
297/// Email body content
298#[derive(Debug, Clone, Serialize, Deserialize)]
299pub struct Body {
300    /// Plain text content
301    pub text: String,
302
303    /// HTML content (if available)
304    pub html: Option<String>,
305
306    /// Text extracted from HTML (if HTML-only email)
307    pub text_from_html: Option<String>,
308
309    /// Word count of text content
310    pub word_count: usize,
311
312    /// Character count
313    pub char_count: usize,
314
315    /// Line count
316    pub line_count: usize,
317
318    /// Detected language
319    pub language: Option<String>,
320
321    /// Has attachments indicator from content type
322    pub has_attachments: bool,
323
324    /// Signature block (if detected and separated)
325    pub signature: Option<String>,
326
327    /// Main content without signature
328    pub content_without_signature: String,
329}
330
331impl Body {
332    /// Check if body is empty or minimal
333    #[must_use]
334    pub fn is_empty(&self) -> bool {
335        self.text.trim().is_empty() && self.html.is_none()
336    }
337
338    /// Get the best available text content
339    #[must_use]
340    pub fn best_text(&self) -> &str {
341        if !self.text.is_empty() {
342            &self.text
343        } else if let Some(ref html_text) = self.text_from_html {
344            html_text
345        } else {
346            ""
347        }
348    }
349}
350
351/// Email headers
352#[derive(Debug, Clone, Serialize, Deserialize)]
353pub struct Headers {
354    /// All headers as key-value pairs
355    pub all: Vec<(String, String)>,
356
357    /// Content-Type
358    pub content_type: Option<String>,
359
360    /// X-Mailer or User-Agent
361    pub mailer: Option<String>,
362
363    /// X-Priority
364    pub priority: Option<Priority>,
365
366    /// List-Unsubscribe header (newsletters)
367    pub list_unsubscribe: Option<String>,
368
369    /// Authentication results
370    pub authentication: AuthenticationResults,
371
372    /// Custom headers (X-*)
373    pub custom: Vec<(String, String)>,
374}
375
376/// Email priority level
377#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
378pub enum Priority {
379    Highest,
380    High,
381    Normal,
382    Low,
383    Lowest,
384}
385
386impl Priority {
387    #[must_use]
388    pub fn from_header(value: &str) -> Self {
389        match value.trim() {
390            "1" => Self::Highest,
391            "2" => Self::High,
392            "4" => Self::Low,
393            "5" => Self::Lowest,
394            _ => Self::Normal,
395        }
396    }
397}
398
399/// Email authentication results
400#[derive(Debug, Clone, Default, Serialize, Deserialize)]
401pub struct AuthenticationResults {
402    /// SPF result
403    pub spf: Option<AuthResult>,
404
405    /// DKIM result
406    pub dkim: Option<AuthResult>,
407
408    /// DMARC result
409    pub dmarc: Option<AuthResult>,
410}
411
412/// Authentication result status
413#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
414pub enum AuthResult {
415    Pass,
416    Fail,
417    Neutral,
418    None,
419    Unknown(String),
420}
421
422/// Thread information
423#[derive(Debug, Clone, Serialize, Deserialize)]
424pub struct ThreadInfo {
425    /// In-Reply-To header (message ID of parent)
426    pub in_reply_to: Option<MessageId>,
427
428    /// References header (list of message IDs in thread)
429    pub references: Vec<MessageId>,
430
431    /// Is this a reply?
432    pub is_reply: bool,
433
434    /// Estimated position in thread
435    pub thread_position: u32,
436}
437
438/// Email metadata and analysis results
439#[derive(Debug, Clone, Serialize, Deserialize)]
440pub struct EmailMetadata {
441    /// Spam indicators score (0.0 = clean, 1.0 = spam)
442    pub spam_score: f32,
443
444    /// List of spam indicators found
445    pub spam_indicators: Vec<SpamIndicator>,
446
447    /// Urgency indicators
448    pub urgency: Urgency,
449
450    /// Email category hints
451    pub category_hints: Vec<CategoryHint>,
452
453    /// Is this likely automated/bulk mail?
454    pub is_automated: bool,
455
456    /// Is this from a mailing list?
457    pub is_mailing_list: bool,
458
459    /// Sentiment hints (positive, negative, neutral)
460    pub sentiment: Sentiment,
461}
462
463/// Spam indicator
464#[derive(Debug, Clone, Serialize, Deserialize)]
465pub struct SpamIndicator {
466    pub indicator: String,
467    pub weight: f32,
468}
469
470/// Urgency level
471#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
472pub enum Urgency {
473    Critical,
474    High,
475    Normal,
476    Low,
477}
478
479/// Category hint for email classification
480#[derive(Debug, Clone, Serialize, Deserialize)]
481pub struct CategoryHint {
482    pub category: String,
483    pub confidence: f32,
484    pub reason: String,
485}
486
487/// Sentiment analysis result
488#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
489pub enum Sentiment {
490    Positive,
491    Negative,
492    #[default]
493    Neutral,
494    Mixed,
495}