Skip to main content

br_email/
analyze.rs

1use br_crypto::encoding::code_to_utf8;
2use chrono::{DateTime, Local, TimeZone};
3use json::{object, JsonValue};
4use regex::Regex;
5use std::collections::HashMap;
6use std::ffi::OsStr;
7use std::io::{Error, Write};
8use std::{env, fs, io};
9
10const MAX_DEPTH: usize = 10;
11
12/// 解析邮件
13#[derive(Debug)]
14pub struct AnalyzeEmails {
15    pub debug: bool,
16    pub header: HashMap<String, String>,
17    pub mime_version: String,
18    boundary: String,
19    pub md5: String,
20    pub size: usize,
21    /// 时间戳
22    pub timestamp: i64,
23    /// 本地时间
24    pub datetime: String,
25    /// 主题
26    pub subject: String,
27    /// 发件人
28    pub from: HashMap<String, String>,
29    /// 收件人
30    pub to: HashMap<String, String>,
31    /// 抄送人
32    pub cc: HashMap<String, String>,
33    /// 用于指定收件人回复邮件时应该使用的电子邮件地址
34    pub replyto: HashMap<String, String>,
35    /// 内容类型
36    pub content_type: String,
37    /// 编码规则
38    pub content_transfer_encoding: ContentTransferEncoding,
39    /// 实际发件人
40    pub sender: String,
41    pub body_text: String,
42    pub body_html: String,
43    pub files: JsonValue,
44    pub charset: String,
45    /// 顶层附件文件名(来自 Content-Type name= 或 Content-Disposition filename=)
46    content_filename: String,
47    /// 递归深度(防止恶意嵌套导致栈溢出)
48    depth: usize,
49}
50
51impl AnalyzeEmails {
52    pub fn new(mut data: Vec<u8>, debug: bool) -> io::Result<AnalyzeEmails> {
53        let md5 = br_crypto::md5::encrypt_hex(&data.clone()).to_string();
54        let size = data.len();
55        let data_string = String::from_utf8_lossy(&data).to_string();
56        if data_string.contains("\n\n") {
57            let updated_string = data_string.replace("\n", "\r\n");
58            data = updated_string.as_bytes().to_vec();
59        }
60
61        let subsequence = "\r\n\r\n".as_bytes();
62
63        let (header, body) = match data
64            .windows(subsequence.len())
65            .position(|window| window == subsequence)
66        {
67            None => {
68                if debug {
69                    fs::write(
70                        format!(
71                            "{}/xygs-{}.eml",
72                            env::current_dir()
73                                .unwrap_or_default()
74                                .to_str()
75                                .unwrap_or("."),
76                            md5
77                        ),
78                        data.clone(),
79                    )?;
80                }
81                return Err(Error::other(format!("协议格式错误: {md5}")));
82            }
83            Some(e) => (data[..e].to_vec(), data[e + 4..].to_vec()),
84        };
85        let mut that = Self {
86            debug,
87            header: Default::default(),
88            mime_version: "".to_string(),
89            boundary: "".to_string(),
90            md5,
91            size,
92            timestamp: 0,
93            subject: "".to_string(),
94            from: Default::default(),
95            to: Default::default(),
96            cc: Default::default(),
97            replyto: Default::default(),
98            datetime: "".to_string(),
99            content_type: "".to_string(),
100            content_transfer_encoding: ContentTransferEncoding::Bit7,
101            sender: "".to_string(),
102            body_text: "".to_string(),
103            body_html: "".to_string(),
104            files: object! {},
105            charset: "utf-8".to_string(),
106            content_filename: "".to_string(),
107            depth: 0,
108        };
109        that.header(header)?;
110        if that.content_type.is_empty() {
111            that.content_type = "text/plain".to_string();
112        }
113        that.body(body, data_string)?;
114        Ok(that)
115    }
116
117    fn header(&mut self, data: Vec<u8>) -> io::Result<()> {
118        let data = String::from_utf8_lossy(&data).to_string();
119        let data = data.replace("\r\n\t", "").replace("\r\n ", " ");
120        for item in data.lines() {
121            let (key, value) = match item.find(": ") {
122                Some(e) => (item[..e].to_string(), item[e + 2..].to_string()),
123                None => match item.find(":") {
124                    Some(e) => (item[..e].to_string(), item[e + 1..].to_string()),
125                    None => continue,
126                },
127            };
128            let name = key.to_lowercase();
129            if value.is_empty() {
130                continue;
131            }
132            match key.to_lowercase().as_str() {
133                "mime-version" => self.mime_version = value.to_string(),
134                "from" => {
135                    self.from = self.from(&value);
136                }
137                "sender" => {
138                    self.sender = value.to_string();
139                }
140                "to" => {
141                    self.to = self.email_encoded(&value);
142                }
143                "cc" => {
144                    self.cc = self.email_encoded(&value);
145                }
146                "reply-to" => {
147                    self.replyto = self.email_encoded(&value);
148                }
149                "subject" => {
150                    self.subject = self.subject(value.to_string());
151                }
152                "content-type" => {
153                    let types = value.split(";").collect::<Vec<&str>>();
154                    self.content_type = types[0].trim().to_lowercase().to_string();
155                    match self.content_type.as_str() {
156                        "multipart/mixed"
157                        | "multipart/alternative"
158                        | "multipart/related"
159                        | "multipart/report"
160                        | "multipart/signed"
161                        | "multipart/encrypted"
162                        | "multipart/digest" => match types[1].find("boundary=") {
163                            None => {}
164                            Some(e) => {
165                                let boundary = &types[1][e..];
166                                self.boundary = boundary
167                                    .trim()
168                                    .trim_start_matches("boundary=")
169                                    .trim_start_matches("\"")
170                                    .trim_end_matches("\"")
171                                    .to_string();
172                            }
173                        },
174                        _ => {}
175                    }
176                    if types.len() > 1 {
177                        for item in types.iter() {
178                            let trimmed = item.trim();
179                            if trimmed.contains("charset=") {
180                                self.charset = trimmed
181                                    .split("charset=")
182                                    .last()
183                                    .unwrap_or("")
184                                    .trim_start_matches('"')
185                                    .trim_end_matches('"')
186                                    .to_string();
187                            }
188                            if trimmed.starts_with("name=") {
189                                self.content_filename =
190                                    self.encoded(trimmed.trim_start_matches("name="));
191                            }
192                        }
193                    }
194                }
195                "content-transfer-encoding" => {
196                    self.content_transfer_encoding = ContentTransferEncoding::from(&value);
197                }
198                "date" => self.datetime(&value)?,
199                "content-disposition" => {
200                    if self.content_filename.is_empty() && value.contains("filename=") {
201                        self.content_filename = value.split("filename=").collect::<Vec<&str>>()[1]
202                            .trim_start_matches('"')
203                            .trim_end_matches('"')
204                            .to_string();
205                    }
206                    if self.content_filename.is_empty() && value.contains("filename*=utf-8''") {
207                        self.content_filename =
208                            value.split("filename*=utf-8''").collect::<Vec<&str>>()[1]
209                                .trim_start_matches('"')
210                                .trim_end_matches('"')
211                                .to_string();
212                        self.content_filename =
213                            br_crypto::encoding::urlencoding_decode(self.content_filename.as_str());
214                    }
215                }
216                _ => {
217                    self.header
218                        .insert(name.trim().to_string(), value.to_string());
219                }
220            }
221        }
222        Ok(())
223    }
224    fn body(&mut self, data: Vec<u8>, old_data: String) -> io::Result<()> {
225        match self.content_type.to_lowercase().as_str() {
226            "text/html" => {
227                let data = self.content_transfer_encoding.decode(data)?;
228                let res = code_to_utf8(self.charset.as_str(), data.clone());
229                self.body_html = res;
230            }
231            "text/plain" => {
232                let data = self.content_transfer_encoding.decode(data)?;
233                let res = code_to_utf8(self.charset.as_str(), data.clone());
234                self.body_text = res;
235            }
236            "multipart/mixed"
237            | "multipart/alternative"
238            | "multipart/related"
239            | "multipart/report"
240            | "multipart/signed"
241            | "multipart/encrypted"
242            | "multipart/digest" => {
243                let data = self.content_transfer_encoding.decode(data.clone())?;
244                let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
245                let mut parts_list = vec![];
246                let mut text = String::new();
247
248                parts = match parts.find(self.boundary.as_str()) {
249                    None => parts,
250                    Some(e) => parts[e..].to_string(),
251                };
252                for item in parts.lines() {
253                    if item.contains(self.boundary.as_str()) && text.is_empty() {
254                        continue;
255                    }
256                    if item.contains(self.boundary.as_str()) && !text.is_empty() {
257                        parts_list.push(text.clone());
258                        text = String::new();
259                        continue;
260                    }
261                    text.push_str(item);
262                    text.push_str("\r\n");
263                }
264                for part in parts_list {
265                    if part.trim().is_empty() {
266                        continue;
267                    }
268                    self.parts(part.to_string(), old_data.clone())?;
269                }
270            }
271            _ => {
272                if !self.content_filename.is_empty() {
273                    let data_str = String::from_utf8_lossy(&data).to_string();
274                    let encoding = self.content_transfer_encoding.clone();
275                    let filename = self.content_filename.clone();
276                    let ct = self.content_type.clone();
277                    self.set_files(encoding, &data_str, &filename, ct)?;
278                } else {
279                    log::warn!("未知body类型: {}, 已跳过", self.content_type);
280                }
281            }
282        }
283        Ok(())
284    }
285    /// 部分内容处理
286    fn parts(&mut self, data: String, old_data: String) -> io::Result<()> {
287        if self.depth >= MAX_DEPTH {
288            log::warn!("递归深度超过限制 {}, 已跳过", MAX_DEPTH);
289            return Ok(());
290        }
291        self.depth += 1;
292        let (header_str, body) = match data.find("\r\n\r\n") {
293            None => {
294                if self.debug {
295                    fs::write(
296                        format!(
297                            "{}/head-{}.eml",
298                            env::current_dir()
299                                .unwrap_or_default()
300                                .to_str()
301                                .unwrap_or("."),
302                            self.md5
303                        ),
304                        old_data.clone(),
305                    )?;
306                }
307                return Err(Error::other("解析附件头失败"));
308            }
309            Some(e) => (
310                data[..e].replace("\r\n\t", " ").replace("\r\n ", " "),
311                &data[e + 4..],
312            ),
313        };
314
315        let mut filename = "".to_string();
316        let mut content_type = String::new();
317        let mut boundary = String::new();
318        let mut content_transfer_encoding = ContentTransferEncoding::None;
319        for item in header_str.lines() {
320            let (key, value) = match item.find(": ") {
321                Some(e) => (&item[..e], &item[e + 2..]),
322                None => match item.find(":") {
323                    Some(e) => (&item[..e], &item[e + 1..]),
324                    None => continue,
325                },
326            };
327
328            let name = key.to_lowercase();
329
330            match name.trim() {
331                "content-transfer-encoding" => {
332                    content_transfer_encoding = ContentTransferEncoding::from(value)
333                }
334                "content-type" => {
335                    let types = value.trim().split(";").collect::<Vec<&str>>();
336                    content_type = types[0].trim().to_string();
337                    let name = types
338                        .iter()
339                        .filter(|&x| x.trim().starts_with("name="))
340                        .map(|&x| x.trim().to_string())
341                        .collect::<Vec<String>>();
342                    if !name.is_empty() {
343                        let name = name[0].trim_start_matches("name=");
344                        filename = self.encoded(name);
345                    }
346                    match value.find("boundary=") {
347                        None => {}
348                        Some(i) => {
349                            let mut b = &value[i + 9..];
350                            b = match b.find(";") {
351                                None => b,
352                                Some(i) => &b[..i],
353                            };
354                            boundary = b
355                                .trim_start_matches("\"")
356                                .trim_end_matches("\"")
357                                .to_string();
358                        }
359                    }
360                }
361                "content-id"
362                | "content-length"
363                | "mime-version"
364                | "content-description"
365                | "date"
366                | "x-attachment-id"
367                | "x-attachment-content-disposition" => {}
368                "content-disposition" => {
369                    if filename.is_empty() && value.contains("filename=") {
370                        filename = value.split("filename=").collect::<Vec<&str>>()[1]
371                            .trim_start_matches("\"")
372                            .trim_end_matches("\"")
373                            .to_string();
374                    }
375                    if filename.is_empty() && value.contains("filename*=utf-8''") {
376                        filename = value.split("filename*=utf-8''").collect::<Vec<&str>>()[1]
377                            .trim_start_matches("\"")
378                            .trim_end_matches("\"")
379                            .to_string();
380                        filename = br_crypto::encoding::urlencoding_decode(filename.as_str());
381                    }
382                }
383                _ => {
384                    log::debug!("parts 忽略未知 header: {name} [{item}]");
385                }
386            }
387        }
388
389        match content_type.as_str() {
390            "text/plain" => {
391                if filename.is_empty() {
392                    let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
393                    let text = code_to_utf8(self.charset.as_str(), res.clone());
394                    self.body_text = text;
395                } else {
396                    self.set_files(
397                        content_transfer_encoding,
398                        body,
399                        filename.as_str(),
400                        "".to_string(),
401                    )?;
402                }
403            }
404            "text/html" | "text/x-amp-html" => {
405                if filename.is_empty() {
406                    let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
407                    self.body_html = code_to_utf8(self.charset.as_str(), res.clone());
408                } else {
409                    self.set_files(
410                        content_transfer_encoding,
411                        body,
412                        filename.as_str(),
413                        "".to_string(),
414                    )?;
415                }
416            }
417            "multipart/mixed" | "multipart/alternative" | "multipart/related" | "multipart/report" | "multipart/signed" | "multipart/encrypted" | "multipart/digest" => {
418                let data = self
419                    .content_transfer_encoding
420                    .decode(body.as_bytes().to_vec())?;
421                let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
422
423                parts = match parts.find(boundary.as_str()) {
424                    None => parts,
425                    Some(e) => parts[e..].to_string(),
426                };
427
428                let mut parts_list = vec![];
429                let mut text = String::new();
430                for item in parts.lines() {
431                    if item.contains(&boundary) && text.is_empty() {
432                        continue;
433                    }
434                    if item.contains(&boundary) && !text.is_empty() {
435                        parts_list.push(text);
436                        text = String::new();
437                        continue;
438                    }
439                    text.push_str(item);
440                    text.push_str("\r\n");
441                }
442                for part in parts_list {
443                    if part.trim().is_empty() {
444                        continue;
445                    }
446                    self.parts(part.to_string(), old_data.clone())?;
447                }
448            }
449            "text/calendar" | "message/delivery-status" | "message/disposition-notification" => {}
450            "message/rfc822" => {
451                let data = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
452                let text = code_to_utf8(self.charset.as_str(), data);
453                match AnalyzeEmails::new(text.into_bytes(), self.debug) {
454                    Ok(nested) => {
455                        if self.body_text.is_empty() {
456                            self.body_text = nested.body_text;
457                        }
458                        if self.body_html.is_empty() {
459                            self.body_html = nested.body_html;
460                        }
461                        for (k, v) in nested.files.entries() {
462                            self.files[k] = v.clone();
463                        }
464                    }
465                    Err(e) => {
466                        log::warn!("解析嵌套邮件 message/rfc822 失败: {}", e);
467                    }
468                }
469            }
470            "application/octet-stream"
471            | "application/zip"
472            | "application/pdf"
473            | "image/jpeg"
474            | "image/png"
475            | "image/gif"
476            | "application/ics"
477            | "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
478            | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
479            | "application/vnd.ms-excel" => {
480                if !filename.is_empty() {
481                    self.set_files(
482                        content_transfer_encoding,
483                        body,
484                        filename.as_str(),
485                        content_type.to_string(),
486                    )?;
487                }
488            }
489            _ => {
490                if !filename.is_empty() {
491                    self.set_files(
492                        content_transfer_encoding,
493                        body,
494                        filename.as_str(),
495                        content_type.to_string(),
496                    )?;
497                } else {
498                    if self.debug {
499                        fs::write(
500                            format!(
501                                "{}/content_type-{}.eml",
502                                env::current_dir()
503                                    .unwrap_or_default()
504                                    .to_str()
505                                    .unwrap_or("."),
506                                self.md5
507                            ),
508                            old_data.clone(),
509                        )?;
510                    }
511                    log::warn!(
512                        "未知 parts content_type 类型: {}, 无文件名已跳过",
513                        content_type
514                    );
515                }
516            }
517        }
518        Ok(())
519    }
520    pub fn from(&mut self, value: &str) -> HashMap<String, String> {
521        let mut r = value
522            .split("<")
523            .filter(|x| !x.trim().is_empty())
524            .map(|x| x.trim())
525            .collect::<Vec<&str>>();
526        if r[0].starts_with("\"") && r[0].ends_with("\"") {
527            r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"").trim();
528        }
529        let mut emails = HashMap::new();
530        if r.len() == 1 {
531            let name = r[0].trim_end_matches(">").to_string();
532            emails.insert(name.clone(), name);
533        } else {
534            let name = self.encoded(r[0].trim());
535            let email = r[1].trim_end_matches(">").to_string();
536            emails.insert(email, name);
537        }
538        emails
539    }
540    fn subject(&mut self, value: String) -> String {
541        let value = value.replace("?==?", "?=\r\n\t=?");
542        if !value.contains("=?") && !value.contains("?=") {
543            return value.to_string();
544        }
545        let list = value.split("\r\n\t").collect::<Vec<&str>>();
546        let mut txt = vec![];
547        for item in list {
548            txt.push(self.encoded(item));
549        }
550        txt.join("")
551    }
552
553    fn encoded(&mut self, value: &str) -> String {
554        let t = value.trim_start_matches("\"").trim_end_matches("\"");
555        if t.contains("=?") && t.contains("?=") {
556            let l = t.split(" ").collect::<Vec<&str>>();
557            let mut txt = vec![];
558            for item in l {
559                txt.push(self.encoded_line(item));
560            }
561            txt.join("")
562        } else {
563            t.to_string()
564        }
565    }
566    /// 段落解码
567    fn encoded_line(&mut self, value: &str) -> String {
568        let line = value.split("?").collect::<Vec<&str>>();
569        if line.len() < 4 {
570            return value.to_string();
571        }
572        let charset = line[1].to_lowercase();
573        let code = line[2].to_uppercase();
574        let data = line[3];
575
576        let strs = match code.as_str() {
577            "B" => br_crypto::base64::decode_u8(data),
578            "Q" => br_crypto::qp::decode(data).unwrap_or(vec![]),
579            _ => data.as_bytes().to_vec(),
580        };
581        let text = code_to_utf8(&charset, strs.clone());
582        text.chars().filter(|&x| x != '\u{200b}').collect()
583    }
584
585    /// 时间处理
586    fn datetime(&mut self, value: &str) -> io::Result<()> {
587        let re =
588            Regex::new(r"\s*\(.*\)$").map_err(|e| Error::other(format!("正则表达式错误: {e}")))?;
589        let datetime = re.replace(value, "").to_string();
590        let datetime = datetime.replace("GMT", "+0000").to_string();
591        let datetime = match datetime.find(",") {
592            None => datetime,
593            Some(i) => datetime[i + 1..].trim().to_string(),
594        };
595        // 归一化月份缩写大小写 ("NOV" -> "Nov", "nov" -> "Nov")
596        let datetime = datetime
597            .split_whitespace()
598            .map(|word| {
599                if word.len() == 3 && word.chars().all(|c| c.is_ascii_alphabetic()) {
600                    let mut chars = word.chars();
601                    match chars.next() {
602                        Some(f) => {
603                            format!("{}{}", f.to_uppercase(), chars.as_str().to_lowercase())
604                        }
605                        None => word.to_string(),
606                    }
607                } else {
608                    word.to_string()
609                }
610            })
611            .collect::<Vec<_>>()
612            .join(" ");
613        // 尝试多种日期格式:标准 RFC 2822 (dd Mon YYYY) 和美式 (Mon dd YYYY)
614        let formats = ["%d %b %Y %H:%M:%S %z", "%b %d %Y %H:%M:%S %z"];
615        let datetime = formats
616            .iter()
617            .find_map(|fmt| DateTime::parse_from_str(datetime.as_str(), fmt).ok())
618            .ok_or_else(|| {
619                Error::other(format!("时间解析失败: 所有格式均不匹配 [{datetime:?}]"))
620            })?;
621
622        self.timestamp = datetime.timestamp();
623        self.datetime = Local
624            .timestamp_opt(self.timestamp, 0)
625            .single()
626            .map(|dt| {
627                dt.with_timezone(&Local)
628                    .format("%Y-%m-%d %H:%M:%S")
629                    .to_string()
630            })
631            .unwrap_or_default();
632        Ok(())
633    }
634    pub fn email_encoded(&mut self, value: &str) -> HashMap<String, String> {
635        let list = value.split(",").map(|x| x.trim()).collect::<Vec<&str>>();
636        let mut emails = HashMap::new();
637        for item in list {
638            let mut r = item.split(" <").collect::<Vec<&str>>();
639            if r[0].starts_with("\"") && r[0].ends_with("\"") {
640                r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"");
641            }
642            if r.len() == 1 {
643                let name = r[0]
644                    .trim_start_matches("<")
645                    .trim_end_matches(">")
646                    .to_string();
647                emails.insert(name.clone(), name);
648            } else {
649                let name = self.encoded(r[0].trim());
650                let email = r[1].trim_end_matches(">").to_string();
651                emails.insert(email, name);
652            }
653        }
654        emails
655    }
656    fn set_files(
657        &mut self,
658        mut content_transfer_encoding: ContentTransferEncoding,
659        body: &str,
660        filename: &str,
661        mut content_type: String,
662    ) -> io::Result<()> {
663        // 文件名安全处理:移除路径穿越字符,只保留文件名部分
664        let filename = filename
665            .replace("\\", "/")
666            .split('/')
667            .next_back()
668            .unwrap_or(filename)
669            .replace("..", "")
670            .trim_start_matches('.')
671            .to_string();
672        let filename = if filename.is_empty() { "unnamed".to_string() } else { filename };
673        let filename = filename.as_str();
674        let mut data_str = String::new();
675        if let ContentTransferEncoding::Base64 = content_transfer_encoding {
676            let mut text = "".to_string();
677            for line in body.lines() {
678                text += line;
679            }
680            data_str = text;
681        }
682
683        let body = content_transfer_encoding.decode(data_str.as_bytes().to_vec())?;
684        let md5 = br_crypto::md5::encrypt_hex(&body.clone());
685        let size = body.len();
686        let mut temp_dir = env::temp_dir();
687        temp_dir.push(filename);
688        let path_temp_dir = temp_dir.clone();
689
690        let mut temp_file = match fs::File::create(temp_dir.clone()) {
691            Ok(e) => e,
692            Err(e) => {
693                return Err(Error::other(format!(
694                    "打开(创建)临时文件: {e} [{filename}]"
695                )))
696            }
697        };
698
699        if temp_file.write(body.as_slice()).is_ok() {
700            if content_type.is_empty() {
701                content_type = path_temp_dir
702                    .extension()
703                    .unwrap_or(OsStr::new("unknown"))
704                    .to_str()
705                    .unwrap_or("unknown")
706                    .to_string();
707            }
708
709            self.files[md5.as_str()] = object! {
710                name:filename,
711                md5:md5.clone(),
712                size:size,
713                "content-type":content_type.clone(),
714                file:temp_dir.to_str()
715            };
716        };
717        Ok(())
718    }
719}
720
721impl Default for AnalyzeEmails {
722    fn default() -> Self {
723        Self {
724            debug: false,
725            header: Default::default(),
726            mime_version: "".to_string(),
727            boundary: "".to_string(),
728            md5: "".to_string(),
729            size: 0,
730            timestamp: 0,
731            datetime: "".to_string(),
732            subject: "".to_string(),
733            from: Default::default(),
734            to: Default::default(),
735            cc: Default::default(),
736            replyto: Default::default(),
737            content_type: "".to_string(),
738            content_transfer_encoding: ContentTransferEncoding::None,
739            sender: "".to_string(),
740            body_text: "".to_string(),
741            body_html: "".to_string(),
742            files: JsonValue::Null,
743            charset: "".to_string(),
744            content_filename: "".to_string(),
745            depth: 0,
746        }
747    }
748}
749
750/// 编码规则
751/// 选择 Content-Transfer-Encoding 的原则
752///
753/// 纯文本: 如果内容是纯文本且只包含 ASCII 字符,通常使用 7bit。
754/// 非 ASCII 文本: 如果内容包含非 ASCII 字符,可以使用 quoted-printable 或 8bit,具体取决于内容和兼容性要求。
755/// 二进制数据: 对于图像、视频、音频等二进制数据,通常使用 base64 编码。
756#[derive(Debug, Clone)]
757pub enum ContentTransferEncoding {
758    /// 这种编码方式主要用于编码文本数据,它保持大部分文本的可读性,但会对非 ASCII 字符和特殊字符(如 =, ?, & 等)进行编码,以确保兼容性。
759    /// 适用于包含大量特殊字符或非 ASCII 文本的邮件内容。
760    QuotedPrintable,
761    ///    将二进制数据编码为 ASCII 字符串,使用 64 个字符的字母表(A-Z, a-z, 0-9, +, /)表示二进制数据。每 3 个字节的二进制数据编码为 4 个字符,便于在邮件中传输。
762    /// 常用于编码附件、图像、音频、视频等二进制数据。
763    Base64,
764    /// 表示内容是二进制数据,不能被转义或编码,必须保持原始的二进制格式进行传输。这种编码方式通常用于图像、音频等二进制文件。
765    /// 这种编码要求邮件传输代理能够处理所有可能的字节值,几乎不做任何转换,因此也不是所有系统都支持。
766    Binary,
767    /// 表示内容包含 8 位字符,这意味着它可能包含非 ASCII 字符(如带有音标的字母)。尽管这样编码的邮件可以包含更多字符,但并非所有邮件传输代理都支持 8bit 传输。
768    /// 适用于非 ASCII 的文本数据,但需要确保邮件传输链路支持 8bit 数据传输。
769    Bit8,
770    /// 表示内容是 ASCII 文本,仅包含 7 位字符(即标准 ASCII 字符集),每个字符的最高位是 0。这种编码方式是最常用的,因为它适合绝大多数邮件传输系统。
771    /// 适用于纯文本邮件,不包含任何特殊字符或二进制数据。
772    Bit7,
773    None,
774}
775
776impl ContentTransferEncoding {
777    fn from(value: &str) -> Self {
778        match value.to_lowercase().as_str() {
779            "7bit" => Self::Bit7,
780            "8bit" => Self::Bit8,
781            "binary" => Self::Binary,
782            "base64" => Self::Base64,
783            "quoted-printable" => Self::QuotedPrintable,
784            _ => Self::None,
785        }
786    }
787    fn decode(&mut self, mut data: Vec<u8>) -> io::Result<Vec<u8>> {
788        let res = match self {
789            ContentTransferEncoding::QuotedPrintable => br_crypto::qp::decode(data)?,
790            ContentTransferEncoding::Base64 => {
791                let str = String::from_utf8_lossy(&data).to_string();
792                let mut text = "".to_string();
793                for line in str.lines() {
794                    text += line;
795                }
796                data = text.as_bytes().to_vec();
797                br_crypto::base64::decode_u8(data)
798            }
799            ContentTransferEncoding::Binary => data,
800            ContentTransferEncoding::Bit8 => data,
801            ContentTransferEncoding::Bit7 => data,
802            ContentTransferEncoding::None => data,
803        };
804        Ok(res)
805    }
806}
807
808#[cfg(test)]
809#[allow(clippy::field_reassign_with_default)]
810mod tests {
811    use super::*;
812    use std::time::{SystemTime, UNIX_EPOCH};
813    use std::{env, fs};
814
815    fn unique_token(prefix: &str) -> String {
816        let nanos = SystemTime::now()
817            .duration_since(UNIX_EPOCH)
818            .unwrap()
819            .as_nanos();
820        format!("{prefix}-{nanos}-{}", std::process::id())
821    }
822
823    fn multipart_email(content_type: &str, boundary: &str, part: &str) -> Vec<u8> {
824        format!(
825            "From: sender@example.com\r\n\
826To: receiver@example.com\r\n\
827Subject: Multipart Test\r\n\
828Content-Type: {content_type};boundary=\"{boundary}\";charset=\"utf-8\"\r\n\
829Content-Transfer-Encoding: 7bit\r\n\
830Date: Mon, 01 Jan 2024 12:00:00 GMT (UTC)\r\n\
831\r\n\
832--{boundary}\r\n\
833{part}\r\n\
834--{boundary}--\r\n"
835        )
836        .into_bytes()
837    }
838
839    #[test]
840    fn test_content_transfer_encoding_from() {
841        assert!(matches!(
842            ContentTransferEncoding::from("7bit"),
843            ContentTransferEncoding::Bit7
844        ));
845        assert!(matches!(
846            ContentTransferEncoding::from("8bit"),
847            ContentTransferEncoding::Bit8
848        ));
849        assert!(matches!(
850            ContentTransferEncoding::from("base64"),
851            ContentTransferEncoding::Base64
852        ));
853        assert!(matches!(
854            ContentTransferEncoding::from("BASE64"),
855            ContentTransferEncoding::Base64
856        ));
857        assert!(matches!(
858            ContentTransferEncoding::from("quoted-printable"),
859            ContentTransferEncoding::QuotedPrintable
860        ));
861        assert!(matches!(
862            ContentTransferEncoding::from("binary"),
863            ContentTransferEncoding::Binary
864        ));
865        assert!(matches!(
866            ContentTransferEncoding::from("unknown"),
867            ContentTransferEncoding::None
868        ));
869    }
870
871    #[test]
872    fn test_content_transfer_encoding_decode_7bit() {
873        let mut enc = ContentTransferEncoding::Bit7;
874        let data = b"Hello World".to_vec();
875        let result = enc.decode(data.clone()).unwrap();
876        assert_eq!(result, data);
877    }
878
879    #[test]
880    fn test_content_transfer_encoding_decode_8bit() {
881        let mut enc = ContentTransferEncoding::Bit8;
882        let data = "你好世界".as_bytes().to_vec();
883        let result = enc.decode(data.clone()).unwrap();
884        assert_eq!(result, data);
885    }
886
887    #[test]
888    fn test_content_transfer_encoding_decode_base64() {
889        let mut enc = ContentTransferEncoding::Base64;
890        let data = b"SGVsbG8gV29ybGQ=".to_vec();
891        let result = enc.decode(data).unwrap();
892        assert_eq!(result, b"Hello World");
893    }
894
895    #[test]
896    fn test_analyze_emails_default() {
897        let email = AnalyzeEmails::default();
898        assert!(!email.debug);
899        assert_eq!(email.size, 0);
900        assert_eq!(email.timestamp, 0);
901        assert!(email.subject.is_empty());
902        assert!(email.from.is_empty());
903        assert!(email.to.is_empty());
904    }
905
906    #[test]
907    fn test_analyze_simple_email() {
908        let email_data = b"From: sender@example.com\r\n\
909To: receiver@example.com\r\n\
910Subject: Test Subject\r\n\
911Content-Type: text/plain\r\n\
912Date: 01 Jan 2024 12:00:00 +0000\r\n\
913\r\n\
914Hello, this is a test email body."
915            .to_vec();
916
917        let result = AnalyzeEmails::new(email_data, false).unwrap();
918        assert_eq!(result.subject, "Test Subject");
919        assert_eq!(result.content_type, "text/plain");
920        assert!(result.from.contains_key("sender@example.com"));
921        assert!(result.to.contains_key("receiver@example.com"));
922        assert_eq!(result.body_text, "Hello, this is a test email body.");
923    }
924
925    #[test]
926    fn test_analyze_email_with_encoded_subject() {
927        let email_data = b"From: test@example.com\r\n\
928To: receiver@example.com\r\n\
929Subject: =?UTF-8?B?5rWL6K+V5Li76aKY?=\r\n\
930Content-Type: text/plain\r\n\
931Date: 01 Jan 2024 12:00:00 +0000\r\n\
932\r\n\
933Test body"
934            .to_vec();
935
936        let result = AnalyzeEmails::new(email_data, false).unwrap();
937        assert!(result.subject.contains("测试主题"));
938    }
939
940    #[test]
941    fn test_analyze_email_html() {
942        let email_data = b"From: sender@example.com\r\n\
943To: receiver@example.com\r\n\
944Subject: HTML Test\r\n\
945Content-Type: text/html\r\n\
946Date: 01 Jan 2024 12:00:00 +0000\r\n\
947\r\n\
948<html><body><h1>Hello</h1></body></html>"
949            .to_vec();
950
951        let result = AnalyzeEmails::new(email_data, false).unwrap();
952        assert_eq!(result.content_type, "text/html");
953        assert!(result.body_html.contains("<h1>Hello</h1>"));
954    }
955
956    #[test]
957    fn test_analyze_email_invalid_format() {
958        let invalid_data = b"This is not a valid email".to_vec();
959        let result = AnalyzeEmails::new(invalid_data, false);
960        assert!(result.is_err());
961    }
962
963    #[test]
964    fn test_from_parsing() {
965        let mut email = AnalyzeEmails::default();
966
967        let result = email.from(r#""John Doe" <john@example.com>"#);
968        assert_eq!(result.get("john@example.com").unwrap(), "John Doe");
969
970        let result = email.from(r#"<simple@example.com>"#);
971        assert_eq!(
972            result.get("simple@example.com").unwrap(),
973            "simple@example.com"
974        );
975    }
976
977    #[test]
978    fn test_email_encoded_parsing() {
979        let mut email = AnalyzeEmails::default();
980
981        let result = email.email_encoded(r#"<a@test.com>, <b@test.com>"#);
982        assert!(result.contains_key("a@test.com"));
983        assert!(result.contains_key("b@test.com"));
984    }
985
986    #[test]
987    fn test_analyze_email_with_cc() {
988        let email_data = b"From: sender@example.com\r\n\
989To: receiver@example.com\r\n\
990Cc: cc1@example.com, cc2@example.com\r\n\
991Subject: CC Test\r\n\
992Content-Type: text/plain\r\n\
993Date: 01 Jan 2024 12:00:00 +0000\r\n\
994\r\n\
995Test body"
996            .to_vec();
997
998        let result = AnalyzeEmails::new(email_data, false).unwrap();
999        assert!(result.cc.contains_key("cc1@example.com"));
1000        assert!(result.cc.contains_key("cc2@example.com"));
1001    }
1002
1003    #[test]
1004    fn test_content_transfer_encoding_decode_binary() {
1005        let mut enc = ContentTransferEncoding::Binary;
1006        let data = vec![0x00, 0x01, 0x02, 0xFF, 0xFE];
1007        let result = enc.decode(data.clone()).unwrap();
1008        assert_eq!(result, data);
1009    }
1010
1011    #[test]
1012    fn test_content_transfer_encoding_decode_none() {
1013        let mut enc = ContentTransferEncoding::None;
1014        let data = b"raw data".to_vec();
1015        let result = enc.decode(data.clone()).unwrap();
1016        assert_eq!(result, data);
1017    }
1018
1019    #[test]
1020    fn test_analyze_email_with_reply_to() {
1021        let email_data = b"From: sender@example.com\r\n\
1022To: receiver@example.com\r\n\
1023Reply-To: reply@example.com\r\n\
1024Subject: Reply-To Test\r\n\
1025Content-Type: text/plain\r\n\
1026Date: 01 Jan 2024 12:00:00 +0000\r\n\
1027\r\n\
1028Test body"
1029            .to_vec();
1030
1031        let result = AnalyzeEmails::new(email_data, false).unwrap();
1032        assert!(result.replyto.contains_key("reply@example.com"));
1033    }
1034
1035    #[test]
1036    fn test_analyze_email_with_sender() {
1037        let email_data = b"From: sender@example.com\r\n\
1038Sender: actual-sender@example.com\r\n\
1039To: receiver@example.com\r\n\
1040Subject: Sender Test\r\n\
1041Content-Type: text/plain\r\n\
1042Date: 01 Jan 2024 12:00:00 +0000\r\n\
1043\r\n\
1044Test body"
1045            .to_vec();
1046
1047        let result = AnalyzeEmails::new(email_data, false).unwrap();
1048        assert_eq!(result.sender, "actual-sender@example.com");
1049    }
1050
1051    #[test]
1052    fn test_analyze_email_with_mion() {
1053        let email_data = b"From: sender@example.com\r\n\
1054To: receiver@example.com\r\n\
1055MIME-Version: 1.0\r\n\
1056Subject: MIME Test\r\n\
1057Content-Type: text/plain\r\n\
1058Date: 01 Jan 2024 12:00:00 +0000\r\n\
1059\r\n\
1060Test body"
1061            .to_vec();
1062
1063        let result = AnalyzeEmails::new(email_data, false).unwrap();
1064        assert_eq!(result.mime_version, "1.0");
1065    }
1066
1067    #[test]
1068    fn test_analyze_email_lf_only() {
1069        let email_data = b"From: sender@example.com\n\
1070To: receiver@example.com\n\
1071Subject: LF Only Test\n\
1072Content-Type: text/plain\n\
1073Date: 01 Jan 2024 12:00:00 +0000\n\
1074\n\
1075Test body with LF only"
1076            .to_vec();
1077
1078        let result = AnalyzeEmails::new(email_data, false).unwrap();
1079        assert_eq!(result.subject, "LF Only Test");
1080    }
1081
1082    #[test]
1083    fn test_analyze_email_with_custom_header() {
1084        let email_data = b"From: sender@example.com\r\n\
1085To: receiver@example.com\r\n\
1086X-Custom-Header: custom-value\r\n\
1087Subject: Custom Header Test\r\n\
1088Content-Type: text/plain\r\n\
1089Date: 01 Jan 2024 12:00:00 +0000\r\n\
1090\r\n\
1091Test body"
1092            .to_vec();
1093
1094        let result = AnalyzeEmails::new(email_data, false).unwrap();
1095        assert_eq!(
1096            result.header.get("x-custom-header").unwrap(),
1097            "custom-value"
1098        );
1099    }
1100
1101    #[test]
1102    fn test_analyze_email_base64_body() {
1103        let email_data = b"From: sender@example.com\r\n\
1104To: receiver@example.com\r\n\
1105Subject: Base64 Test\r\n\
1106Content-Type: text/plain\r\n\
1107Content-Transfer-Encoding: base64\r\n\
1108Date: 01 Jan 2024 12:00:00 +0000\r\n\
1109\r\n\
1110SGVsbG8gV29ybGQ="
1111            .to_vec();
1112
1113        let result = AnalyzeEmails::new(email_data, false).unwrap();
1114        assert_eq!(result.body_text, "Hello World");
1115    }
1116
1117    #[test]
1118    fn test_from_parsing_simple_email() {
1119        let mut email = AnalyzeEmails::default();
1120        let result = email.from("user@example.com");
1121        assert!(result.contains_key("user@example.com"));
1122    }
1123
1124    #[test]
1125    fn test_email_encoded_with_name() {
1126        let mut email = AnalyzeEmails::default();
1127        let result = email.email_encoded(r#"John Doe <john@example.com>"#);
1128        assert_eq!(result.get("john@example.com").unwrap(), "John Doe");
1129    }
1130
1131    #[test]
1132    fn test_analyze_invalid_email_debug_writes_eml() {
1133        let invalid_data = b"invalid-email-without-separator".to_vec();
1134        let md5 = br_crypto::md5::encrypt_hex(&invalid_data);
1135        let path = env::current_dir().unwrap().join(format!("xygs-{md5}.eml"));
1136        let _ = fs::remove_file(&path);
1137
1138        let result = AnalyzeEmails::new(invalid_data.clone(), true);
1139        assert!(result.is_err());
1140        assert!(path.exists());
1141        assert_eq!(fs::read(&path).unwrap(), invalid_data);
1142
1143        let _ = fs::remove_file(path);
1144    }
1145
1146    #[test]
1147    fn test_header_colon_only_and_empty_value_skip() {
1148        let email_data = b"From: sender@example.com\r\n\
1149To: receiver@example.com\r\n\
1150Subject:Colon Header\r\n\
1151Content-Type:text/plain;charset=\"utf-8\"\r\n\
1152X-No-Space:value-without-space\r\n\
1153X-Empty:\r\n\
1154Date:Mon, 01 Jan 2024 12:00:00 GMT\r\n\
1155\r\n\
1156Body"
1157            .to_vec();
1158
1159        let result = AnalyzeEmails::new(email_data, false).unwrap();
1160        assert_eq!(result.subject, "Colon Header");
1161        assert_eq!(result.charset, "utf-8");
1162        assert_eq!(
1163            result.header.get("x-no-space").unwrap(),
1164            "value-without-space"
1165        );
1166        assert!(!result.header.contains_key("x-empty"));
1167    }
1168
1169    #[test]
1170    fn test_header_line_without_any_colon_is_skipped() {
1171        let email_data = b"From: sender@example.com\r\n\
1172To: receiver@example.com\r\n\
1173no-colon-line-here\r\n\
1174Subject: Test\r\n\
1175Content-Type: text/plain\r\n\
1176Date: 01 Jan 2024 12:00:00 +0000\r\n\
1177\r\n\
1178body"
1179            .to_vec();
1180
1181        let result = AnalyzeEmails::new(email_data, false).unwrap();
1182        assert_eq!(result.subject, "Test");
1183    }
1184
1185    #[test]
1186    fn test_multipart_body_parsing_for_all_supported_types() {
1187        let content_types = [
1188            "multipart/mixed",
1189            "multipart/alternative",
1190            "multipart/related",
1191            "multipart/report",
1192        ];
1193
1194        for content_type in content_types {
1195            let boundary = unique_token("boundary");
1196            let email_data = multipart_email(
1197                content_type,
1198                boundary.as_str(),
1199                "Content-Type: text/plain\r\nContent-Transfer-Encoding: 7bit\r\n\r\nHello multipart body",
1200            );
1201            let result = AnalyzeEmails::new(email_data, false).unwrap();
1202
1203            assert_eq!(result.content_type, content_type);
1204            assert_eq!(result.charset, "utf-8");
1205            assert!(result.body_text.contains("Hello multipart body"));
1206        }
1207    }
1208
1209    #[test]
1210    fn test_parts_header_parse_failure_debug_writes_file() {
1211        let mut email = AnalyzeEmails::default();
1212        email.debug = true;
1213        email.md5 = unique_token("head");
1214        email.files = object! {};
1215
1216        let path = env::current_dir()
1217            .unwrap()
1218            .join(format!("head-{}.eml", email.md5));
1219        let _ = fs::remove_file(&path);
1220
1221        let result = email.parts(
1222            "invalid-part-content".to_string(),
1223            "raw-email-data".to_string(),
1224        );
1225        assert!(result.is_err());
1226        assert!(path.exists());
1227        assert_eq!(fs::read_to_string(&path).unwrap(), "raw-email-data");
1228
1229        let _ = fs::remove_file(path);
1230    }
1231
1232    #[test]
1233    fn test_parts_unknown_header_is_ignored() {
1234        let mut email = AnalyzeEmails::default();
1235        email.charset = "utf-8".to_string();
1236        email.files = object! {};
1237
1238        let result = email.parts(
1239            "X-Unknown: value\r\nContent-Type: text/plain\r\n\r\nbody".to_string(),
1240            "raw".to_string(),
1241        );
1242        assert!(result.is_ok());
1243        assert_eq!(email.body_text, "body");
1244    }
1245
1246    #[test]
1247    fn test_parts_text_plain_with_name_as_attachment() {
1248        let mut email = AnalyzeEmails::default();
1249        email.charset = "utf-8".to_string();
1250        email.files = object! {};
1251
1252        let filename = format!("{}.txt", unique_token("plain-attachment"));
1253        let part = format!(
1254            "Content-Type:text/plain; name=\"{filename}\"\r\nContent-Transfer-Encoding:base64\r\n\r\nSGVsbG8gQXR0YWNobWVudA=="
1255        );
1256
1257        email.parts(part, "raw".to_string()).unwrap();
1258
1259        let body = b"Hello Attachment".to_vec();
1260        let md5 = br_crypto::md5::encrypt_hex(&body);
1261        let entry = &email.files[md5.as_str()];
1262
1263        assert_eq!(entry["name"].as_str().unwrap(), filename);
1264        assert_eq!(entry["content-type"].as_str().unwrap(), "txt");
1265        let path = entry["file"].as_str().unwrap();
1266        assert_eq!(fs::read(path).unwrap(), body);
1267
1268        let _ = fs::remove_file(path);
1269    }
1270
1271    #[test]
1272    fn test_parts_text_html_with_name_as_attachment() {
1273        let mut email = AnalyzeEmails::default();
1274        email.charset = "utf-8".to_string();
1275        email.files = object! {};
1276
1277        let filename = format!("{}.html", unique_token("html-attachment"));
1278        let part = format!(
1279            "Content-Type: text/html; name=\"{filename}\"\r\nContent-Transfer-Encoding: base64\r\n\r\nPGgxPkhlbGxvIEhUTUwgQXR0YWNobWVudDwvaDE+"
1280        );
1281
1282        email.parts(part, "raw".to_string()).unwrap();
1283
1284        let body = b"<h1>Hello HTML Attachment</h1>".to_vec();
1285        let md5 = br_crypto::md5::encrypt_hex(&body);
1286        let entry = &email.files[md5.as_str()];
1287
1288        assert_eq!(entry["name"].as_str().unwrap(), filename);
1289        assert_eq!(entry["content-type"].as_str().unwrap(), "html");
1290        let path = entry["file"].as_str().unwrap();
1291        assert_eq!(fs::read(path).unwrap(), body);
1292
1293        let _ = fs::remove_file(path);
1294    }
1295
1296    #[test]
1297    fn test_parts_content_disposition_filename() {
1298        let mut email = AnalyzeEmails::default();
1299        email.charset = "utf-8".to_string();
1300        email.files = object! {};
1301
1302        let filename = format!("{}.pdf", unique_token("filename"));
1303        let part = format!(
1304            "Content-Type: application/pdf\r\nContent-Transfer-Encoding: base64\r\nContent-Disposition: attachment; filename=\"{filename}\"\r\n\r\nSGVsbG8gUERG"
1305        );
1306
1307        email.parts(part, "raw".to_string()).unwrap();
1308
1309        let body = b"Hello PDF".to_vec();
1310        let md5 = br_crypto::md5::encrypt_hex(&body);
1311        let entry = &email.files[md5.as_str()];
1312
1313        assert_eq!(entry["name"].as_str().unwrap(), filename);
1314        assert_eq!(entry["content-type"].as_str().unwrap(), "application/pdf");
1315        let path = entry["file"].as_str().unwrap();
1316        assert_eq!(fs::read(path).unwrap(), body);
1317
1318        let _ = fs::remove_file(path);
1319    }
1320
1321    #[test]
1322    fn test_parts_content_disposition_filename_utf8_star() {
1323        let mut email = AnalyzeEmails::default();
1324        email.charset = "utf-8".to_string();
1325        email.files = object! {};
1326
1327        let part = "Content-Type: application/octet-stream\r\n\
1328Content-Transfer-Encoding: base64\r\n\
1329Content-Disposition: attachment; filename*=utf-8''hello%20world.txt\r\n\
1330\r\n\
1331SGVsbG8gVVJMIEZpbGU="
1332            .to_string();
1333
1334        email.parts(part, "raw".to_string()).unwrap();
1335
1336        let body = b"Hello URL File".to_vec();
1337        let md5 = br_crypto::md5::encrypt_hex(&body);
1338        let entry = &email.files[md5.as_str()];
1339
1340        assert_eq!(entry["name"].as_str().unwrap(), "hello world.txt");
1341        assert_eq!(
1342            entry["content-type"].as_str().unwrap(),
1343            "application/octet-stream"
1344        );
1345        let path = entry["file"].as_str().unwrap();
1346        assert_eq!(fs::read(path).unwrap(), body);
1347
1348        let _ = fs::remove_file(path);
1349    }
1350
1351    #[test]
1352    fn test_parts_nested_multipart() {
1353        let mut email = AnalyzeEmails::default();
1354        email.charset = "utf-8".to_string();
1355        email.files = object! {};
1356
1357        let boundary = unique_token("inner-boundary");
1358        let part = format!(
1359            "Content-Type: multipart/alternative; boundary=\"{boundary}\"\r\n\
1360Content-Transfer-Encoding: 7bit\r\n\
1361\r\n\
1362--{boundary}\r\n\
1363Content-Type: text/plain\r\n\
1364Content-Transfer-Encoding: 7bit\r\n\
1365\r\n\
1366Nested text body\r\n\
1367--{boundary}--\r\n"
1368        );
1369
1370        email.parts(part, "raw".to_string()).unwrap();
1371        assert!(email.body_text.contains("Nested text body"));
1372    }
1373
1374    #[test]
1375    fn test_parts_text_calendar_is_skipped() {
1376        let mut email = AnalyzeEmails::default();
1377        email.charset = "utf-8".to_string();
1378        email.files = object! {};
1379        email.body_text = "keep-me".to_string();
1380
1381        let part = "Content-Type: text/calendar\r\n\
1382Content-Transfer-Encoding: 7bit\r\n\
1383\r\n\
1384BEGIN:VCALENDAR"
1385            .to_string();
1386
1387        email.parts(part, "raw".to_string()).unwrap();
1388        assert_eq!(email.body_text, "keep-me");
1389    }
1390
1391    #[test]
1392    fn test_parts_application_content_types_are_saved() {
1393        let mut email = AnalyzeEmails::default();
1394        email.charset = "utf-8".to_string();
1395        email.files = object! {};
1396
1397        let content_types = [
1398            "application/octet-stream",
1399            "application/zip",
1400            "application/pdf",
1401            "image/jpeg",
1402            "image/png",
1403            "image/gif",
1404            "application/ics",
1405            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1406            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1407            "application/vnd.ms-excel",
1408        ];
1409        let body = b"Hello File".to_vec();
1410        let md5 = br_crypto::md5::encrypt_hex(&body);
1411
1412        for (idx, content_type) in content_types.iter().enumerate() {
1413            let filename = format!("{}-{idx}.bin", unique_token("app-attachment"));
1414            let part = format!(
1415                "Content-Type: {content_type}; name=\"{filename}\"\r\nContent-Transfer-Encoding: base64\r\n\r\nSGVsbG8gRmlsZQ=="
1416            );
1417
1418            email.parts(part, "raw".to_string()).unwrap();
1419
1420            let entry = &email.files[md5.as_str()];
1421            assert_eq!(entry["name"].as_str().unwrap(), filename);
1422            assert_eq!(entry["content-type"].as_str().unwrap(), *content_type);
1423
1424            let path = entry["file"].as_str().unwrap();
1425            assert_eq!(fs::read(path).unwrap(), body);
1426            let _ = fs::remove_file(path);
1427        }
1428    }
1429
1430    #[test]
1431    fn test_parts_unknown_content_type_debug_writes_file() {
1432        let mut email = AnalyzeEmails::default();
1433        email.debug = true;
1434        email.md5 = unique_token("content-type");
1435        email.charset = "utf-8".to_string();
1436        email.files = object! {};
1437
1438        let path = env::current_dir()
1439            .unwrap()
1440            .join(format!("content_type-{}.eml", email.md5));
1441        let _ = fs::remove_file(&path);
1442
1443        let part = "Content-Type: application/x-custom\r\n\
1444Content-Transfer-Encoding: 7bit\r\n\
1445\r\n\
1446custom body"
1447            .to_string();
1448
1449        email
1450            .parts(part, "raw-unknown-content-type".to_string())
1451            .unwrap();
1452        assert!(path.exists());
1453        assert_eq!(
1454            fs::read_to_string(&path).unwrap(),
1455            "raw-unknown-content-type"
1456        );
1457
1458        let _ = fs::remove_file(path);
1459    }
1460
1461    #[test]
1462    fn test_encoded_line_paths() {
1463        let mut email = AnalyzeEmails::default();
1464
1465        assert_eq!(email.encoded_line("plain text"), "plain text");
1466        assert_eq!(email.encoded_line("=?UTF-8?Q?=48=65=6C=6C=6F?="), "Hello");
1467        assert_eq!(
1468            email.encoded_line("=?UTF-8?X?UnknownEncoding?="),
1469            "UnknownEncoding"
1470        );
1471    }
1472
1473    #[test]
1474    fn test_datetime_parsing_variants_and_error() {
1475        let mut email = AnalyzeEmails::default();
1476
1477        email
1478            .datetime("Mon, 01 Jan 2024 12:00:00 GMT (UTC)")
1479            .unwrap();
1480        assert!(email.timestamp > 0);
1481        assert!(!email.datetime.is_empty());
1482
1483        let err = email.datetime("invalid datetime format").unwrap_err();
1484        assert!(err.to_string().contains("时间解析失败"));
1485    }
1486
1487
1488    #[test]
1489    fn test_datetime_us_style_month_first() {
1490        let mut email = AnalyzeEmails::default();
1491        // 美式日期格式: Mon dd YYYY (如 "Nov 4 2025 22:19:26 +0800")
1492        email
1493            .datetime("Mon, Nov 4 2025 22:19:26 +0800")
1494            .unwrap();
1495        assert!(email.timestamp > 0);
1496        assert!(!email.datetime.is_empty());
1497        // 标准格式仍然正常工作
1498        email
1499            .datetime("Tue, 15 Oct 2024 08:30:00 +0000")
1500            .unwrap();
1501        assert!(email.timestamp > 0);
1502    }
1503    #[test]
1504    fn test_email_encoded_with_encoded_and_quoted_names() {
1505        let mut email = AnalyzeEmails::default();
1506        let result = email.email_encoded(
1507            "\"Quoted User\" <quoted@example.com>, =?UTF-8?B?5rWL6K+V?= <encoded@example.com>",
1508        );
1509
1510        assert_eq!(result.get("quoted@example.com").unwrap(), "Quoted User");
1511        assert_eq!(result.get("encoded@example.com").unwrap(), "测试");
1512    }
1513
1514    #[test]
1515    fn test_set_files_base64_decodes_and_detects_extension() {
1516        let mut email = AnalyzeEmails::default();
1517        email.files = object! {};
1518
1519        let filename = format!("{}.txt", unique_token("set-files"));
1520        email
1521            .set_files(
1522                ContentTransferEncoding::Base64,
1523                "c2V0IGZpbGVzIGJvZHk=\r\n",
1524                filename.as_str(),
1525                "".to_string(),
1526            )
1527            .unwrap();
1528
1529        let body = b"set files body".to_vec();
1530        let md5 = br_crypto::md5::encrypt_hex(&body);
1531        let entry = &email.files[md5.as_str()];
1532
1533        assert_eq!(entry["name"].as_str().unwrap(), filename);
1534        assert_eq!(entry["content-type"].as_str().unwrap(), "txt");
1535        let path = entry["file"].as_str().unwrap();
1536        assert_eq!(fs::read(path).unwrap(), body);
1537
1538        let _ = fs::remove_file(path);
1539    }
1540
1541    #[test]
1542    fn test_header_parses_colon_without_space_separator() {
1543        let email_data = b"From:sender@example.com\r\n\
1544To:receiver@example.com\r\n\
1545X-Test:nospaceval\r\n\
1546Content-Type:text/plain\r\n\
1547Date: 01 Jan 2024 12:00:00 +0000\r\n\
1548\r\n\
1549body"
1550            .to_vec();
1551
1552        let result = AnalyzeEmails::new(email_data, false).unwrap();
1553        assert_eq!(result.header.get("x-test").unwrap(), "nospaceval");
1554    }
1555
1556    #[test]
1557    fn test_multipart_header_without_boundary_keeps_boundary_empty() {
1558        let email_data = b"From: sender@example.com\r\n\
1559To: receiver@example.com\r\n\
1560Subject: Multipart Without Boundary\r\n\
1561Content-Type: multipart/mixed; charset=utf-8\r\n\
1562Date: 01 Jan 2024 12:00:00 +0000\r\n\
1563\r\n\
1564body without multipart markers"
1565            .to_vec();
1566
1567        let result = AnalyzeEmails::new(email_data, false).unwrap();
1568        assert_eq!(result.content_type, "multipart/mixed");
1569        assert!(result.boundary.is_empty());
1570    }
1571
1572    #[test]
1573    fn test_body_multipart_boundary_not_found_uses_original_body() {
1574        let boundary = unique_token("missing-boundary");
1575        let email_data = format!(
1576            "From: sender@example.com\r\n\
1577To: receiver@example.com\r\n\
1578Subject: Boundary Missing In Body\r\n\
1579Content-Type: multipart/mixed;boundary=\"{boundary}\";charset=\"utf-8\"\r\n\
1580Content-Transfer-Encoding: 7bit\r\n\
1581Date: 01 Jan 2024 12:00:00 +0000\r\n\
1582\r\n\
1583this body intentionally has no boundary lines"
1584        )
1585        .into_bytes();
1586
1587        let result = AnalyzeEmails::new(email_data, false).unwrap();
1588        assert_eq!(result.content_type, "multipart/mixed");
1589        assert_eq!(result.boundary, boundary);
1590        assert!(result.body_text.is_empty());
1591        assert!(result.body_html.is_empty());
1592    }
1593
1594    #[test]
1595    fn test_body_multipart_skips_empty_part_segments() {
1596        let boundary = unique_token("empty-part");
1597        let email_data = format!(
1598            "From: sender@example.com\r\n\
1599To: receiver@example.com\r\n\
1600Subject: Empty Multipart Segment\r\n\
1601Content-Type: multipart/mixed; boundary=\"{boundary}\"\r\n\
1602Date: 01 Jan 2024 12:00:00 +0000\r\n\
1603\r\n\
1604--{boundary}\r\n\
1605Content-Type: text/plain\r\n\
1606Content-Transfer-Encoding: 7bit\r\n\
1607\r\n\
1608first text\r\n\
1609--{boundary}\r\n\
1610\r\n\
1611--{boundary}--\r\n"
1612        )
1613        .into_bytes();
1614
1615        let result = AnalyzeEmails::new(email_data, false).unwrap();
1616        assert_eq!(result.content_type, "multipart/mixed");
1617        assert!(result.body_text.contains("first text"));
1618    }
1619
1620    #[test]
1621    fn test_body_unknown_content_type_skips_gracefully() {
1622        let email_data = b"From: a@b.com\r\n\
1623Content-Type: application/json\r\n\
1624Date: 01 Jan 2024 12:00:00 +0000\r\n\
1625\r\n\
1626{\"key\":\"value\"}"
1627            .to_vec();
1628        let result = AnalyzeEmails::new(email_data, false);
1629        assert!(result.is_ok());
1630        let email = result.unwrap();
1631        assert!(email.body_text.is_empty());
1632        assert!(email.body_html.is_empty());
1633    }
1634
1635    #[test]
1636    fn test_parts_ignores_header_line_without_colon() {
1637        let mut email = AnalyzeEmails::default();
1638        email.charset = "utf-8".to_string();
1639        email.files = object! {};
1640
1641        let part = "NoColonHeader\r\n\
1642Content-Type: text/plain\r\n\
1643Content-Transfer-Encoding: 7bit\r\n\
1644\r\n\
1645plain body"
1646            .to_string();
1647
1648        email.parts(part, "raw".to_string()).unwrap();
1649        assert_eq!(email.body_text, "plain body");
1650    }
1651
1652    #[test]
1653    fn test_parts_content_type_boundary_with_semicolon_suffix() {
1654        let mut email = AnalyzeEmails::default();
1655        email.charset = "utf-8".to_string();
1656        email.files = object! {};
1657
1658        let boundary = unique_token("inner-semi");
1659        let part = format!(
1660            "Content-Type: multipart/alternative; boundary=\"{boundary}\"; charset=\"utf-8\"\r\n\
1661Content-Transfer-Encoding: 7bit\r\n\
1662\r\n\
1663--{boundary}\r\n\
1664Content-Type: text/plain\r\n\
1665Content-Transfer-Encoding: 7bit\r\n\
1666\r\n\
1667nested plain body\r\n\
1668--{boundary}--\r\n"
1669        );
1670
1671        email.parts(part, "raw".to_string()).unwrap();
1672        assert!(email.body_text.contains("nested plain body"));
1673    }
1674
1675    #[test]
1676    fn test_parts_text_html_without_filename_sets_body_html() {
1677        let mut email = AnalyzeEmails::default();
1678        email.charset = "utf-8".to_string();
1679        email.files = object! {};
1680
1681        let part = "Content-Type: text/html\r\n\
1682Content-Transfer-Encoding: 7bit\r\n\
1683\r\n\
1684<p>inline html body</p>"
1685            .to_string();
1686
1687        email.parts(part, "raw".to_string()).unwrap();
1688        assert!(email.body_html.contains("<p>inline html body</p>"));
1689    }
1690
1691    #[test]
1692    fn test_parts_nested_multipart_outer_boundary_not_found_skips_empty_part() {
1693        let mut email = AnalyzeEmails::default();
1694        email.charset = "utf-8".to_string();
1695        email.files = object! {};
1696        email.boundary = unique_token("outer-boundary");
1697
1698        let inner_boundary = unique_token("inner-boundary");
1699        let part = format!(
1700            "Content-Type: multipart/alternative; boundary=\"{inner_boundary}\"\r\n\
1701Content-Transfer-Encoding: 7bit\r\n\
1702\r\n\
1703--{inner_boundary}\r\n\
1704Content-Type: text/plain\r\n\
1705Content-Transfer-Encoding: 7bit\r\n\
1706\r\n\
1707nested plain text\r\n\
1708--{inner_boundary}\r\n\
1709\r\n\
1710--{inner_boundary}--\r\n"
1711        );
1712
1713        email.parts(part, "raw".to_string()).unwrap();
1714        assert!(email.body_text.contains("nested plain text"));
1715    }
1716
1717    #[test]
1718    fn test_set_files_returns_error_when_create_fails() {
1719        let mut email = AnalyzeEmails::default();
1720        email.files = object! {};
1721
1722        let filename = "test\0file.txt";
1723
1724        let err = email
1725            .set_files(
1726                ContentTransferEncoding::Bit7,
1727                "ignored",
1728                filename,
1729                "application/octet-stream".to_string(),
1730            )
1731            .unwrap_err();
1732
1733        assert!(err.to_string().contains("打开(创建)临时文件"));
1734    }
1735
1736    #[test]
1737    fn test_content_transfer_encoding_decode_quoted_printable() {
1738        let mut enc = ContentTransferEncoding::QuotedPrintable;
1739        let data = b"Hello=20World=21".to_vec();
1740        let result = enc.decode(data).unwrap();
1741        assert_eq!(result, b"Hello World!");
1742    }
1743}