Skip to main content

rusmes_proto/
mime.rs

1//! MIME parsing support for RFC 5322 and RFC 2045
2//!
3//! This module provides:
4//! - Header folding/unfolding per RFC 5322
5//! - Content-Transfer-Encoding decoding (base64, quoted-printable)
6//! - MIME multipart parsing
7//! - Content-Type header parsing
8
9use crate::error::{MailError, Result};
10use std::collections::HashMap;
11
12/// Content transfer encoding types per RFC 2045
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum ContentTransferEncoding {
15    /// 7-bit ASCII
16    SevenBit,
17    /// 8-bit
18    EightBit,
19    /// Binary
20    Binary,
21    /// Quoted-printable
22    QuotedPrintable,
23    /// Base64
24    Base64,
25}
26
27impl ContentTransferEncoding {
28    /// Parse encoding from string
29    pub fn parse(s: &str) -> Self {
30        match s.to_lowercase().as_str() {
31            "quoted-printable" => Self::QuotedPrintable,
32            "base64" => Self::Base64,
33            "8bit" => Self::EightBit,
34            "binary" => Self::Binary,
35            _ => Self::SevenBit,
36        }
37    }
38}
39
40/// Content-Type header parsed per RFC 2045
41#[derive(Debug, Clone)]
42pub struct ContentType {
43    /// Main type (e.g., "text", "multipart")
44    pub main_type: String,
45    /// Sub type (e.g., "plain", "mixed")
46    pub sub_type: String,
47    /// Parameters (e.g., charset, boundary)
48    pub parameters: HashMap<String, String>,
49}
50
51impl ContentType {
52    /// Parse Content-Type header value
53    pub fn parse(value: &str) -> Result<Self> {
54        let value = value.trim();
55
56        // Find the semicolon that separates type from parameters
57        let (type_part, params_part) = if let Some(pos) = value.find(';') {
58            (&value[..pos], &value[pos + 1..])
59        } else {
60            (value, "")
61        };
62
63        // Parse main/sub type
64        let (main_type, sub_type) = if let Some(pos) = type_part.find('/') {
65            let main = type_part[..pos].trim().to_lowercase();
66            let sub = type_part[pos + 1..].trim().to_lowercase();
67            (main, sub)
68        } else {
69            return Err(MailError::Parse(format!(
70                "Invalid Content-Type format: {}",
71                value
72            )));
73        };
74
75        // Parse parameters
76        let mut parameters = HashMap::new();
77        for param in params_part.split(';') {
78            let param = param.trim();
79            if param.is_empty() {
80                continue;
81            }
82
83            if let Some(pos) = param.find('=') {
84                let key = param[..pos].trim().to_lowercase();
85                let mut val = param[pos + 1..].trim();
86
87                // Remove quotes if present
88                if val.starts_with('"') && val.ends_with('"') && val.len() >= 2 {
89                    val = &val[1..val.len() - 1];
90                }
91
92                parameters.insert(key, val.to_string());
93            }
94        }
95
96        Ok(ContentType {
97            main_type,
98            sub_type,
99            parameters,
100        })
101    }
102
103    /// Get boundary parameter for multipart messages
104    pub fn boundary(&self) -> Option<&str> {
105        self.parameters.get("boundary").map(|s| s.as_str())
106    }
107
108    /// Get charset parameter
109    pub fn charset(&self) -> Option<&str> {
110        self.parameters.get("charset").map(|s| s.as_str())
111    }
112
113    /// Check if this is a multipart type
114    pub fn is_multipart(&self) -> bool {
115        self.main_type == "multipart"
116    }
117}
118
119/// A single part in a MIME multipart message
120#[derive(Debug, Clone)]
121pub struct MimePart {
122    /// Headers for this part
123    pub headers: HashMap<String, String>,
124    /// Body content (raw bytes)
125    pub body: Vec<u8>,
126}
127
128impl MimePart {
129    /// Get Content-Type for this part
130    pub fn content_type(&self) -> Result<Option<ContentType>> {
131        if let Some(ct) = self.headers.get("content-type") {
132            Ok(Some(ContentType::parse(ct)?))
133        } else {
134            Ok(None)
135        }
136    }
137
138    /// Get Content-Transfer-Encoding for this part
139    pub fn content_transfer_encoding(&self) -> ContentTransferEncoding {
140        if let Some(cte) = self.headers.get("content-transfer-encoding") {
141            ContentTransferEncoding::parse(cte.trim())
142        } else {
143            ContentTransferEncoding::SevenBit
144        }
145    }
146
147    /// Decode the body according to Content-Transfer-Encoding
148    pub fn decode_body(&self) -> Result<Vec<u8>> {
149        let encoding = self.content_transfer_encoding();
150
151        match encoding {
152            ContentTransferEncoding::Base64 => decode_base64(&self.body),
153            ContentTransferEncoding::QuotedPrintable => decode_quoted_printable(&self.body),
154            _ => Ok(self.body.clone()),
155        }
156    }
157}
158
159/// Unfold headers per RFC 5322 section 2.2.3
160///
161/// Headers can be folded by inserting CRLF before whitespace.
162/// This function removes the folding by replacing CRLF+whitespace with a single space.
163pub fn unfold_header(value: &str) -> String {
164    let mut result = String::with_capacity(value.len());
165    let mut chars = value.chars().peekable();
166    let mut prev_was_cr = false;
167    let mut prev_was_lf = false;
168
169    while let Some(ch) = chars.next() {
170        match ch {
171            '\r' => {
172                prev_was_cr = true;
173                prev_was_lf = false;
174            }
175            '\n' => {
176                if prev_was_cr {
177                    prev_was_lf = true;
178                    prev_was_cr = false;
179                } else {
180                    // LF without CR
181                    prev_was_lf = true;
182                }
183            }
184            ' ' | '\t' => {
185                // If previous was CRLF or LF, this is a fold point
186                if prev_was_lf {
187                    // Skip all following whitespace and replace with single space
188                    while let Some(&next_ch) = chars.peek() {
189                        if next_ch == ' ' || next_ch == '\t' {
190                            chars.next();
191                        } else {
192                            break;
193                        }
194                    }
195                    result.push(' ');
196                } else {
197                    result.push(ch);
198                }
199                prev_was_cr = false;
200                prev_was_lf = false;
201            }
202            _ => {
203                prev_was_cr = false;
204                prev_was_lf = false;
205                result.push(ch);
206            }
207        }
208    }
209
210    result
211}
212
213/// Fold a header value per RFC 5322
214///
215/// Headers should not exceed 78 characters per line.
216/// Folding is done by inserting CRLF before whitespace.
217pub fn fold_header(value: &str, max_len: usize) -> String {
218    if value.len() <= max_len {
219        return value.to_string();
220    }
221
222    let mut result = String::with_capacity(value.len() + value.len() / max_len * 3);
223    let mut line_len = 0;
224    let mut last_space = 0;
225    let mut pending = String::new();
226
227    for ch in value.chars() {
228        pending.push(ch);
229        line_len += 1;
230
231        if ch == ' ' || ch == '\t' {
232            last_space = pending.len();
233        }
234
235        if line_len >= max_len && last_space > 0 {
236            // Fold at the last space
237            result.push_str(&pending[..last_space]);
238            result.push_str("\r\n ");
239            pending = pending[last_space..].trim_start().to_string();
240            line_len = pending.len();
241            last_space = 0;
242        }
243    }
244
245    result.push_str(&pending);
246    result
247}
248
249/// Parse headers from raw message data
250///
251/// Handles header folding per RFC 5322.
252/// Returns a map of header names (lowercase) to values.
253pub fn parse_headers(data: &[u8]) -> Result<(HashMap<String, String>, usize)> {
254    let mut headers = HashMap::new();
255    let mut pos = 0;
256    let mut current_header: Option<(String, String)> = None;
257
258    let mut line_start = 0;
259    let data_len = data.len();
260
261    while pos < data_len {
262        // Find line ending
263        let line_end = if pos + 1 < data_len && data[pos] == b'\r' && data[pos + 1] == b'\n' {
264            pos += 2;
265            pos - 2
266        } else if pos < data_len && data[pos] == b'\n' {
267            pos += 1;
268            pos - 1
269        } else {
270            pos += 1;
271            continue;
272        };
273
274        let line = &data[line_start..line_end];
275
276        // Empty line signals end of headers
277        if line.is_empty() {
278            if let Some((name, value)) = current_header.take() {
279                headers.insert(name, unfold_header(&value));
280            }
281            break;
282        }
283
284        // Check if this is a continuation line (starts with whitespace)
285        if !line.is_empty() && (line[0] == b' ' || line[0] == b'\t') {
286            if let Some((_, ref mut value)) = current_header {
287                let line_str = String::from_utf8_lossy(line);
288                value.push_str(&line_str);
289            }
290        } else {
291            // New header line
292            if let Some((name, value)) = current_header.take() {
293                headers.insert(name, unfold_header(&value));
294            }
295
296            // Parse header name and value
297            if let Some(colon_pos) = line.iter().position(|&b| b == b':') {
298                let name = String::from_utf8_lossy(&line[..colon_pos])
299                    .trim()
300                    .to_lowercase();
301                let value = String::from_utf8_lossy(&line[colon_pos + 1..]).to_string();
302                current_header = Some((name, value));
303            }
304        }
305
306        line_start = pos;
307    }
308
309    // Don't forget the last header
310    if let Some((name, value)) = current_header {
311        headers.insert(name, unfold_header(&value));
312    }
313
314    Ok((headers, pos))
315}
316
317/// Split a multipart MIME message into its parts
318pub fn split_multipart(body: &[u8], boundary: &str) -> Result<Vec<MimePart>> {
319    let mut parts = Vec::new();
320
321    // Construct boundary markers
322    let start_boundary = format!("--{}", boundary);
323    let end_boundary = format!("--{}--", boundary);
324
325    let start_marker = start_boundary.as_bytes();
326    let end_marker = end_boundary.as_bytes();
327
328    let mut pos = 0;
329    let body_len = body.len();
330
331    // Find first boundary
332    while pos < body_len {
333        if body[pos..].starts_with(start_marker) {
334            pos += start_marker.len();
335            // Skip to end of line
336            while pos < body_len && body[pos] != b'\n' {
337                pos += 1;
338            }
339            if pos < body_len {
340                pos += 1; // Skip the \n
341            }
342            break;
343        }
344        pos += 1;
345    }
346
347    // Parse each part
348    loop {
349        if pos >= body_len {
350            break;
351        }
352
353        let part_start = pos;
354
355        // Find next boundary
356        let mut next_boundary_pos = None;
357        let mut is_end = false;
358
359        let mut search_pos = pos;
360        while search_pos < body_len {
361            if body[search_pos..].starts_with(end_marker) {
362                next_boundary_pos = Some(search_pos);
363                is_end = true;
364                break;
365            } else if body[search_pos..].starts_with(start_marker) {
366                next_boundary_pos = Some(search_pos);
367                break;
368            }
369            search_pos += 1;
370        }
371
372        if let Some(boundary_pos) = next_boundary_pos {
373            // Extract part data (excluding the boundary)
374            let part_data = &body[part_start..boundary_pos];
375
376            // Parse headers and body for this part
377            let (part_headers, headers_end) = parse_headers(part_data)?;
378            let part_body = if headers_end < part_data.len() {
379                part_data[headers_end..].to_vec()
380            } else {
381                Vec::new()
382            };
383
384            // Trim trailing CRLF from body
385            let part_body = trim_trailing_crlf(&part_body);
386
387            parts.push(MimePart {
388                headers: part_headers,
389                body: part_body,
390            });
391
392            if is_end {
393                break;
394            }
395
396            // Move past boundary
397            pos = boundary_pos + start_marker.len();
398            while pos < body_len && body[pos] != b'\n' {
399                pos += 1;
400            }
401            if pos < body_len {
402                pos += 1;
403            }
404        } else {
405            break;
406        }
407    }
408
409    Ok(parts)
410}
411
412/// Trim trailing CRLF from a byte slice
413fn trim_trailing_crlf(data: &[u8]) -> Vec<u8> {
414    let mut end = data.len();
415
416    while end > 0 {
417        if end >= 2 && data[end - 2] == b'\r' && data[end - 1] == b'\n' {
418            end -= 2;
419        } else if end >= 1 && data[end - 1] == b'\n' {
420            end -= 1;
421        } else {
422            break;
423        }
424    }
425
426    data[..end].to_vec()
427}
428
429/// Decode Base64 content per RFC 2045
430pub fn decode_base64(data: &[u8]) -> Result<Vec<u8>> {
431    // Filter out whitespace and newlines as per RFC 2045
432    let filtered: Vec<u8> = data
433        .iter()
434        .copied()
435        .filter(|&b| !matches!(b, b'\r' | b'\n' | b' ' | b'\t'))
436        .collect();
437
438    // Simple base64 decoding implementation
439    let alphabet = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
440    let mut decode_table = [255u8; 256];
441    for (i, &ch) in alphabet.iter().enumerate() {
442        decode_table[ch as usize] = i as u8;
443    }
444
445    let mut result = Vec::with_capacity(filtered.len() * 3 / 4);
446    let mut i = 0;
447
448    while i + 4 <= filtered.len() {
449        let b0 = filtered[i];
450        let b1 = filtered[i + 1];
451        let b2 = filtered[i + 2];
452        let b3 = filtered[i + 3];
453
454        let v0 = decode_table[b0 as usize];
455        let v1 = decode_table[b1 as usize];
456        let v2 = if b2 == b'=' {
457            0
458        } else {
459            decode_table[b2 as usize]
460        };
461        let v3 = if b3 == b'=' {
462            0
463        } else {
464            decode_table[b3 as usize]
465        };
466
467        if v0 == 255 || v1 == 255 {
468            return Err(MailError::Parse("Invalid base64 character".to_string()));
469        }
470
471        result.push((v0 << 2) | (v1 >> 4));
472
473        if b2 != b'=' {
474            result.push((v1 << 4) | (v2 >> 2));
475        }
476
477        if b3 != b'=' {
478            result.push((v2 << 6) | v3);
479        }
480
481        i += 4;
482    }
483
484    Ok(result)
485}
486
487/// Decode quoted-printable content per RFC 2045
488pub fn decode_quoted_printable(data: &[u8]) -> Result<Vec<u8>> {
489    let mut result = Vec::with_capacity(data.len());
490    let mut i = 0;
491
492    while i < data.len() {
493        if data[i] == b'=' {
494            if i + 2 < data.len() {
495                let c1 = data[i + 1];
496                let c2 = data[i + 2];
497
498                // Soft line break (=CRLF or =LF)
499                if c1 == b'\r' && i + 3 < data.len() && data[i + 2] == b'\n' {
500                    i += 3;
501                    continue;
502                } else if c1 == b'\n' {
503                    i += 2;
504                    continue;
505                }
506
507                // Hex encoded character
508                if let (Some(h1), Some(h2)) = (hex_value(c1), hex_value(c2)) {
509                    result.push((h1 << 4) | h2);
510                    i += 3;
511                    continue;
512                }
513            }
514
515            // If we get here, it's a malformed sequence - pass through the '='
516            result.push(b'=');
517            i += 1;
518        } else {
519            result.push(data[i]);
520            i += 1;
521        }
522    }
523
524    Ok(result)
525}
526
527/// Convert a hex digit to its numeric value
528fn hex_value(c: u8) -> Option<u8> {
529    match c {
530        b'0'..=b'9' => Some(c - b'0'),
531        b'A'..=b'F' => Some(c - b'A' + 10),
532        b'a'..=b'f' => Some(c - b'a' + 10),
533        _ => None,
534    }
535}
536
537/// Encode data as Base64
538pub fn encode_base64(data: &[u8]) -> String {
539    const ALPHABET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
540
541    let mut result = String::with_capacity(data.len().div_ceil(3) * 4);
542    let mut i = 0;
543
544    while i + 3 <= data.len() {
545        let b0 = data[i];
546        let b1 = data[i + 1];
547        let b2 = data[i + 2];
548
549        result.push(ALPHABET[(b0 >> 2) as usize] as char);
550        result.push(ALPHABET[(((b0 & 0x03) << 4) | (b1 >> 4)) as usize] as char);
551        result.push(ALPHABET[(((b1 & 0x0f) << 2) | (b2 >> 6)) as usize] as char);
552        result.push(ALPHABET[(b2 & 0x3f) as usize] as char);
553
554        i += 3;
555    }
556
557    // Handle remaining bytes
558    match data.len() - i {
559        1 => {
560            let b0 = data[i];
561            result.push(ALPHABET[(b0 >> 2) as usize] as char);
562            result.push(ALPHABET[((b0 & 0x03) << 4) as usize] as char);
563            result.push_str("==");
564        }
565        2 => {
566            let b0 = data[i];
567            let b1 = data[i + 1];
568            result.push(ALPHABET[(b0 >> 2) as usize] as char);
569            result.push(ALPHABET[(((b0 & 0x03) << 4) | (b1 >> 4)) as usize] as char);
570            result.push(ALPHABET[((b1 & 0x0f) << 2) as usize] as char);
571            result.push('=');
572        }
573        _ => {}
574    }
575
576    result
577}
578
579/// Encode data as quoted-printable
580pub fn encode_quoted_printable(data: &[u8]) -> String {
581    let mut result = String::with_capacity(data.len());
582    let mut line_len = 0;
583
584    for &byte in data {
585        // Characters that must be encoded
586        let needs_encoding = !(33..=126).contains(&byte) || byte == b'=';
587
588        if needs_encoding {
589            let encoded = format!("={:02X}", byte);
590
591            // Check if we need a soft line break
592            if line_len + encoded.len() > 76 {
593                result.push_str("=\r\n");
594                line_len = 0;
595            }
596
597            result.push_str(&encoded);
598            line_len += encoded.len();
599        } else {
600            // Check if we need a soft line break
601            if line_len >= 76 {
602                result.push_str("=\r\n");
603                line_len = 0;
604            }
605
606            result.push(byte as char);
607            line_len += 1;
608        }
609    }
610
611    result
612}