Skip to main content

br_email/
analyze.rs

1use br_crypto::encoding::code_to_utf8;
2use chrono::{DateTime, Local, TimeZone};
3use json::{object, JsonValue};
4use regex::Regex;
5use std::collections::HashMap;
6use std::ffi::OsStr;
7use std::io::{Error, Write};
8use std::{env, fs, io};
9
10const MAX_DEPTH: usize = 10;
11
12/// 解析邮件
13#[derive(Debug)]
14pub struct AnalyzeEmails {
15    pub debug: bool,
16    pub header: HashMap<String, String>,
17    pub mime_version: String,
18    boundary: String,
19    pub md5: String,
20    pub size: usize,
21    /// 时间戳
22    pub timestamp: i64,
23    /// 本地时间
24    pub datetime: String,
25    /// 主题
26    pub subject: String,
27    /// 发件人
28    pub from: HashMap<String, String>,
29    /// 收件人
30    pub to: HashMap<String, String>,
31    /// 抄送人
32    pub cc: HashMap<String, String>,
33    /// 用于指定收件人回复邮件时应该使用的电子邮件地址
34    pub replyto: HashMap<String, String>,
35    /// 内容类型
36    pub content_type: String,
37    /// 编码规则
38    pub content_transfer_encoding: ContentTransferEncoding,
39    /// 实际发件人
40    pub sender: String,
41    pub body_text: String,
42    pub body_html: String,
43    pub files: JsonValue,
44    pub charset: String,
45    /// 顶层附件文件名(来自 Content-Type name= 或 Content-Disposition filename=)
46    content_filename: String,
47    /// 递归深度(防止恶意嵌套导致栈溢出)
48    depth: usize,
49    /// 附件临时目录(默认 env::temp_dir(),可通过 new_with_temp_dir 自定义)
50    temp_dir: Option<std::path::PathBuf>,
51}
52
53impl AnalyzeEmails {
54    pub fn new(data: Vec<u8>, debug: bool) -> io::Result<AnalyzeEmails> {
55        Self::init(data, debug, None)
56    }
57
58    /// Create with custom temp directory for attachment extraction.
59    /// Uses md5-prefixed filenames to avoid collisions between parallel parses.
60    pub fn new_with_temp_dir(data: Vec<u8>, debug: bool, temp_dir: std::path::PathBuf) -> io::Result<AnalyzeEmails> {
61        Self::init(data, debug, Some(temp_dir))
62    }
63
64    fn init(mut data: Vec<u8>, debug: bool, temp_dir: Option<std::path::PathBuf>) -> io::Result<AnalyzeEmails> {
65        let md5 = br_crypto::md5::encrypt_hex(&data.clone()).to_string();
66        let size = data.len();
67        let data_string = String::from_utf8_lossy(&data).to_string();
68        if data_string.contains("\n\n") {
69            let updated_string = data_string.replace("\n", "\r\n");
70            data = updated_string.as_bytes().to_vec();
71        }
72
73        let subsequence = "\r\n\r\n".as_bytes();
74
75        let (header, body) = match data
76            .windows(subsequence.len())
77            .position(|window| window == subsequence)
78        {
79            None => {
80                if debug {
81                    fs::write(
82                        format!(
83                            "{}/xygs-{}.eml",
84                            env::current_dir()
85                                .unwrap_or_default()
86                                .to_str()
87                                .unwrap_or("."),
88                            md5
89                        ),
90                        data.clone(),
91                    )?;
92                }
93                return Err(Error::other(format!("协议格式错误: {md5}")));
94            }
95            Some(e) => (data[..e].to_vec(), data[e + 4..].to_vec()),
96        };
97        let mut that = Self {
98            debug,
99            header: Default::default(),
100            mime_version: "".to_string(),
101            boundary: "".to_string(),
102            md5,
103            size,
104            timestamp: 0,
105            subject: "".to_string(),
106            from: Default::default(),
107            to: Default::default(),
108            cc: Default::default(),
109            replyto: Default::default(),
110            datetime: "".to_string(),
111            content_type: "".to_string(),
112            content_transfer_encoding: ContentTransferEncoding::Bit7,
113            sender: "".to_string(),
114            body_text: "".to_string(),
115            body_html: "".to_string(),
116            files: object! {},
117            charset: "utf-8".to_string(),
118            content_filename: "".to_string(),
119            depth: 0,
120            temp_dir,
121        };
122        that.header(header)?;
123        if that.content_type.is_empty() {
124            that.content_type = "text/plain".to_string();
125        }
126        that.body(body, data_string)?;
127        Ok(that)
128    }
129
130    fn header(&mut self, data: Vec<u8>) -> io::Result<()> {
131        let data = String::from_utf8_lossy(&data).to_string();
132        let data = data.replace("\r\n\t", "").replace("\r\n ", " ");
133        for item in data.lines() {
134            let (key, value) = match item.find(": ") {
135                Some(e) => (item[..e].to_string(), item[e + 2..].to_string()),
136                None => match item.find(":") {
137                    Some(e) => (item[..e].to_string(), item[e + 1..].to_string()),
138                    None => continue,
139                },
140            };
141            let name = key.to_lowercase();
142            if value.is_empty() {
143                continue;
144            }
145            match key.to_lowercase().as_str() {
146                "mime-version" => self.mime_version = value.to_string(),
147                "from" => {
148                    self.from = self.from(&value);
149                }
150                "sender" => {
151                    self.sender = value.to_string();
152                }
153                "to" => {
154                    self.to = self.email_encoded(&value);
155                }
156                "cc" => {
157                    self.cc = self.email_encoded(&value);
158                }
159                "reply-to" => {
160                    self.replyto = self.email_encoded(&value);
161                }
162                "subject" => {
163                    self.subject = self.subject(value.to_string());
164                }
165                "content-type" => {
166                    let types = value.split(";").collect::<Vec<&str>>();
167                    self.content_type = types[0].trim().to_lowercase().to_string();
168                    match self.content_type.as_str() {
169                        "multipart/mixed"
170                        | "multipart/alternative"
171                        | "multipart/related"
172                        | "multipart/report"
173                        | "multipart/encrypted"
174                        | "multipart/digest" => match types[1].find("boundary=") {
175                            None => {}
176                            Some(e) => {
177                                let boundary = &types[1][e..];
178                                self.boundary = boundary
179                                    .trim()
180                                    .trim_start_matches("boundary=")
181                                    .trim_start_matches("\"")
182                                    .trim_end_matches("\"")
183                                    .to_string();
184                            }
185                        },
186                        _ => {}
187                    }
188                    if types.len() > 1 {
189                        for item in types.iter() {
190                            let trimmed = item.trim();
191                            if trimmed.contains("charset=") {
192                                self.charset = trimmed
193                                    .split("charset=")
194                                    .last()
195                                    .unwrap_or("")
196                                    .trim_start_matches('"')
197                                    .trim_end_matches('"')
198                                    .to_string();
199                            }
200                            if trimmed.starts_with("name=") {
201                                self.content_filename =
202                                    self.encoded(trimmed.trim_start_matches("name="));
203                            }
204                        }
205                    }
206                }
207                "content-transfer-encoding" => {
208                    self.content_transfer_encoding = ContentTransferEncoding::from(&value);
209                }
210                "date" => self.datetime(&value)?,
211                "content-disposition" => {
212                    if self.content_filename.is_empty() && value.contains("filename=") {
213                        self.content_filename = value.split("filename=").collect::<Vec<&str>>()[1]
214                            .trim_start_matches('"')
215                            .trim_end_matches('"')
216                            .to_string();
217                    }
218                    if self.content_filename.is_empty() && value.contains("filename*=utf-8''") {
219                        self.content_filename =
220                            value.split("filename*=utf-8''").collect::<Vec<&str>>()[1]
221                                .trim_start_matches('"')
222                                .trim_end_matches('"')
223                                .to_string();
224                        self.content_filename =
225                            br_crypto::encoding::urlencoding_decode(self.content_filename.as_str());
226                    }
227                }
228                _ => {
229                    self.header
230                        .insert(name.trim().to_string(), value.to_string());
231                }
232            }
233        }
234        Ok(())
235    }
236    fn body(&mut self, data: Vec<u8>, old_data: String) -> io::Result<()> {
237        match self.content_type.to_lowercase().as_str() {
238            "text/html" => {
239                let data = self.content_transfer_encoding.decode(data)?;
240                let res = code_to_utf8(self.charset.as_str(), data.clone());
241                self.body_html = res;
242            }
243            "text/plain" => {
244                let data = self.content_transfer_encoding.decode(data)?;
245                let res = code_to_utf8(self.charset.as_str(), data.clone());
246                self.body_text = res;
247            }
248            "multipart/mixed"
249            | "multipart/alternative"
250            | "multipart/related"
251            | "multipart/report"
252            | "multipart/signed"
253            | "multipart/encrypted"
254            | "multipart/digest" => {
255                let data = self.content_transfer_encoding.decode(data.clone())?;
256                let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
257                let mut parts_list = vec![];
258                let mut text = String::new();
259
260                parts = match parts.find(self.boundary.as_str()) {
261                    None => parts,
262                    Some(e) => parts[e..].to_string(),
263                };
264                for item in parts.lines() {
265                    if item.contains(self.boundary.as_str()) && text.is_empty() {
266                        continue;
267                    }
268                    if item.contains(self.boundary.as_str()) && !text.is_empty() {
269                        parts_list.push(text.clone());
270                        text = String::new();
271                        continue;
272                    }
273                    text.push_str(item);
274                    text.push_str("\r\n");
275                }
276                for part in parts_list {
277                    if part.trim().is_empty() {
278                        continue;
279                    }
280                    self.parts(part.to_string(), old_data.clone())?;
281                }
282            }
283            _ => {
284                if !self.content_filename.is_empty() {
285                    let data_str = String::from_utf8_lossy(&data).to_string();
286                    let encoding = self.content_transfer_encoding.clone();
287                    let filename = self.content_filename.clone();
288                    let ct = self.content_type.clone();
289                    self.set_files(encoding, &data_str, &filename, ct)?;
290                } else {
291                    log::warn!("未知body类型: {}, 已跳过", self.content_type);
292                }
293            }
294        }
295        Ok(())
296    }
297    /// 部分内容处理
298    fn parts(&mut self, data: String, old_data: String) -> io::Result<()> {
299        if self.depth >= MAX_DEPTH {
300            log::warn!("递归深度超过限制 {}, 已跳过", MAX_DEPTH);
301            return Ok(());
302        }
303        self.depth += 1;
304        let (header_str, body) = match data.find("\r\n\r\n") {
305            None => {
306                if self.debug {
307                    fs::write(
308                        format!(
309                            "{}/head-{}.eml",
310                            env::current_dir()
311                                .unwrap_or_default()
312                                .to_str()
313                                .unwrap_or("."),
314                            self.md5
315                        ),
316                        old_data.clone(),
317                    )?;
318                }
319                return Err(Error::other("解析附件头失败"));
320            }
321            Some(e) => (
322                data[..e].replace("\r\n\t", " ").replace("\r\n ", " "),
323                &data[e + 4..],
324            ),
325        };
326
327        let mut filename = "".to_string();
328        let mut content_type = String::new();
329        let mut boundary = String::new();
330        let mut content_transfer_encoding = ContentTransferEncoding::None;
331        for item in header_str.lines() {
332            let (key, value) = match item.find(": ") {
333                Some(e) => (&item[..e], &item[e + 2..]),
334                None => match item.find(":") {
335                    Some(e) => (&item[..e], &item[e + 1..]),
336                    None => continue,
337                },
338            };
339
340            let name = key.to_lowercase();
341
342            match name.trim() {
343                "content-transfer-encoding" => {
344                    content_transfer_encoding = ContentTransferEncoding::from(value)
345                }
346                "content-type" => {
347                    let types = value.trim().split(";").collect::<Vec<&str>>();
348                    content_type = types[0].trim().to_string();
349                    let name = types
350                        .iter()
351                        .filter(|&x| x.trim().starts_with("name="))
352                        .map(|&x| x.trim().to_string())
353                        .collect::<Vec<String>>();
354                    if !name.is_empty() {
355                        let name = name[0].trim_start_matches("name=");
356                        filename = self.encoded(name);
357                    }
358                    match value.find("boundary=") {
359                        None => {}
360                        Some(i) => {
361                            let mut b = &value[i + 9..];
362                            b = match b.find(";") {
363                                None => b,
364                                Some(i) => &b[..i],
365                            };
366                            boundary = b
367                                .trim_start_matches("\"")
368                                .trim_end_matches("\"")
369                                .to_string();
370                        }
371                    }
372                }
373                "content-id"
374                | "content-length"
375                | "mime-version"
376                | "content-description"
377                | "date"
378                | "x-attachment-id"
379                | "x-attachment-content-disposition" => {}
380                "content-disposition" => {
381                    if filename.is_empty() && value.contains("filename=") {
382                        filename = value.split("filename=").collect::<Vec<&str>>()[1]
383                            .trim_start_matches("\"")
384                            .trim_end_matches("\"")
385                            .to_string();
386                    }
387                    if filename.is_empty() && value.contains("filename*=utf-8''") {
388                        filename = value.split("filename*=utf-8''").collect::<Vec<&str>>()[1]
389                            .trim_start_matches("\"")
390                            .trim_end_matches("\"")
391                            .to_string();
392                        filename = br_crypto::encoding::urlencoding_decode(filename.as_str());
393                    }
394                }
395                _ => {
396                    log::debug!("parts 忽略未知 header: {name} [{item}]");
397                }
398            }
399        }
400
401        match content_type.as_str() {
402            "text/plain" => {
403                if filename.is_empty() {
404                    let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
405                    let text = code_to_utf8(self.charset.as_str(), res.clone());
406                    self.body_text = text;
407                } else {
408                    self.set_files(
409                        content_transfer_encoding,
410                        body,
411                        filename.as_str(),
412                        "".to_string(),
413                    )?;
414                }
415            }
416            "text/html" | "text/x-amp-html" => {
417                if filename.is_empty() {
418                    let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
419                    self.body_html = code_to_utf8(self.charset.as_str(), res.clone());
420                } else {
421                    self.set_files(
422                        content_transfer_encoding,
423                        body,
424                        filename.as_str(),
425                        "".to_string(),
426                    )?;
427                }
428            }
429            "multipart/mixed" | "multipart/alternative" | "multipart/related" | "multipart/report" | "multipart/signed" | "multipart/encrypted" | "multipart/digest" => {
430                let data = self
431                    .content_transfer_encoding
432                    .decode(body.as_bytes().to_vec())?;
433                let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
434
435                parts = match parts.find(boundary.as_str()) {
436                    None => parts,
437                    Some(e) => parts[e..].to_string(),
438                };
439
440                let mut parts_list = vec![];
441                let mut text = String::new();
442                for item in parts.lines() {
443                    if item.contains(&boundary) && text.is_empty() {
444                        continue;
445                    }
446                    if item.contains(&boundary) && !text.is_empty() {
447                        parts_list.push(text);
448                        text = String::new();
449                        continue;
450                    }
451                    text.push_str(item);
452                    text.push_str("\r\n");
453                }
454                for part in parts_list {
455                    if part.trim().is_empty() {
456                        continue;
457                    }
458                    self.parts(part.to_string(), old_data.clone())?;
459                }
460            }
461            "text/calendar" | "message/delivery-status" | "message/disposition-notification" => {}
462            "message/rfc822" => {
463                let data = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
464                let text = code_to_utf8(self.charset.as_str(), data);
465                match AnalyzeEmails::new(text.into_bytes(), self.debug) {
466                    Ok(nested) => {
467                        if self.body_text.is_empty() {
468                            self.body_text = nested.body_text;
469                        }
470                        if self.body_html.is_empty() {
471                            self.body_html = nested.body_html;
472                        }
473                        for (k, v) in nested.files.entries() {
474                            self.files[k] = v.clone();
475                        }
476                    }
477                    Err(e) => {
478                        log::warn!("解析嵌套邮件 message/rfc822 失败: {}", e);
479                    }
480                }
481            }
482            "application/octet-stream"
483            | "application/zip"
484            | "application/pdf"
485            | "image/jpeg"
486            | "image/png"
487            | "image/gif"
488            | "application/ics"
489            | "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
490            | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
491            | "application/vnd.ms-excel" => {
492                if !filename.is_empty() {
493                    self.set_files(
494                        content_transfer_encoding,
495                        body,
496                        filename.as_str(),
497                        content_type.to_string(),
498                    )?;
499                }
500            }
501            _ => {
502                if !filename.is_empty() {
503                    self.set_files(
504                        content_transfer_encoding,
505                        body,
506                        filename.as_str(),
507                        content_type.to_string(),
508                    )?;
509                } else {
510                    if self.debug {
511                        fs::write(
512                            format!(
513                                "{}/content_type-{}.eml",
514                                env::current_dir()
515                                    .unwrap_or_default()
516                                    .to_str()
517                                    .unwrap_or("."),
518                                self.md5
519                            ),
520                            old_data.clone(),
521                        )?;
522                    }
523                    log::warn!(
524                        "未知 parts content_type 类型: {}, 无文件名已跳过",
525                        content_type
526                    );
527                }
528            }
529        }
530        Ok(())
531    }
532    pub fn from(&mut self, value: &str) -> HashMap<String, String> {
533        let mut r = value
534            .split("<")
535            .filter(|x| !x.trim().is_empty())
536            .map(|x| x.trim())
537            .collect::<Vec<&str>>();
538        if r[0].starts_with("\"") && r[0].ends_with("\"") {
539            r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"").trim();
540        }
541        let mut emails = HashMap::new();
542        if r.len() == 1 {
543            let name = r[0].trim_end_matches(">").to_string();
544            emails.insert(name.clone(), name);
545        } else {
546            let name = self.encoded(r[0].trim());
547            let email = r[1].trim_end_matches(">").to_string();
548            emails.insert(email, name);
549        }
550        emails
551    }
552    fn subject(&mut self, value: String) -> String {
553        let value = value.replace("?==?", "?=\r\n\t=?");
554        if !value.contains("=?") && !value.contains("?=") {
555            return value.to_string();
556        }
557        let list = value.split("\r\n\t").collect::<Vec<&str>>();
558        let mut txt = vec![];
559        for item in list {
560            txt.push(self.encoded(item));
561        }
562        txt.join("")
563    }
564
565    fn encoded(&mut self, value: &str) -> String {
566        let t = value.trim_start_matches("\"").trim_end_matches("\"");
567        if t.contains("=?") && t.contains("?=") {
568            let l = t.split(" ").collect::<Vec<&str>>();
569            let mut txt = vec![];
570            for item in l {
571                txt.push(self.encoded_line(item));
572            }
573            txt.join("")
574        } else {
575            t.to_string()
576        }
577    }
578    /// 段落解码
579    fn encoded_line(&mut self, value: &str) -> String {
580        let line = value.split("?").collect::<Vec<&str>>();
581        if line.len() < 4 {
582            return value.to_string();
583        }
584        let charset = line[1].to_lowercase();
585        let code = line[2].to_uppercase();
586        let data = line[3];
587
588        let strs = match code.as_str() {
589            "B" => br_crypto::base64::decode_u8(data),
590            "Q" => br_crypto::qp::decode(data).unwrap_or(vec![]),
591            _ => data.as_bytes().to_vec(),
592        };
593        let text = code_to_utf8(&charset, strs.clone());
594        text.chars().filter(|&x| x != '\u{200b}').collect()
595    }
596
597    /// 时间处理
598    fn datetime(&mut self, value: &str) -> io::Result<()> {
599        let re =
600            Regex::new(r"\s*\(.*\)$").map_err(|e| Error::other(format!("正则表达式错误: {e}")))?;
601        let datetime = re.replace(value, "").to_string();
602        let datetime = datetime.replace("GMT", "+0000").to_string();
603        let datetime = match datetime.find(",") {
604            None => datetime,
605            Some(i) => datetime[i + 1..].trim().to_string(),
606        };
607        // 归一化月份缩写大小写 ("NOV" -> "Nov", "nov" -> "Nov")
608        let datetime = datetime
609            .split_whitespace()
610            .map(|word| {
611                if word.len() == 3 && word.chars().all(|c| c.is_ascii_alphabetic()) {
612                    let mut chars = word.chars();
613                    match chars.next() {
614                        Some(f) => {
615                            format!("{}{}", f.to_uppercase(), chars.as_str().to_lowercase())
616                        }
617                        None => word.to_string(),
618                    }
619                } else {
620                    word.to_string()
621                }
622            })
623            .collect::<Vec<_>>()
624            .join(" ");
625        // 尝试多种日期格式:标准 RFC 2822 (dd Mon YYYY) 和美式 (Mon dd YYYY)
626        let formats = ["%d %b %Y %H:%M:%S %z", "%b %d %Y %H:%M:%S %z"];
627        let datetime = formats
628            .iter()
629            .find_map(|fmt| DateTime::parse_from_str(datetime.as_str(), fmt).ok())
630            .ok_or_else(|| {
631                Error::other(format!("时间解析失败: 所有格式均不匹配 [{datetime:?}]"))
632            })?;
633
634        self.timestamp = datetime.timestamp();
635        self.datetime = Local
636            .timestamp_opt(self.timestamp, 0)
637            .single()
638            .map(|dt| {
639                dt.with_timezone(&Local)
640                    .format("%Y-%m-%d %H:%M:%S")
641                    .to_string()
642            })
643            .unwrap_or_default();
644        Ok(())
645    }
646    pub fn email_encoded(&mut self, value: &str) -> HashMap<String, String> {
647        let list = value.split(",").map(|x| x.trim()).collect::<Vec<&str>>();
648        let mut emails = HashMap::new();
649        for item in list {
650            let mut r = item.split(" <").collect::<Vec<&str>>();
651            if r[0].starts_with("\"") && r[0].ends_with("\"") {
652                r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"");
653            }
654            if r.len() == 1 {
655                let name = r[0]
656                    .trim_start_matches("<")
657                    .trim_end_matches(">")
658                    .to_string();
659                emails.insert(name.clone(), name);
660            } else {
661                let name = self.encoded(r[0].trim());
662                let email = r[1].trim_end_matches(">").to_string();
663                emails.insert(email, name);
664            }
665        }
666        emails
667    }
668    fn set_files(
669        &mut self,
670        mut content_transfer_encoding: ContentTransferEncoding,
671        body: &str,
672        filename: &str,
673        mut content_type: String,
674    ) -> io::Result<()> {
675        // 文件名安全处理:移除路径穿越字符,只保留文件名部分
676        let filename = filename
677            .replace("\\", "/")
678            .split('/')
679            .next_back()
680            .unwrap_or(filename)
681            .replace("..", "")
682            .trim_start_matches('.')
683            .to_string();
684        let filename = if filename.is_empty() { "unnamed".to_string() } else { filename };
685        let filename = filename.as_str();
686        let mut data_str = String::new();
687        if let ContentTransferEncoding::Base64 = content_transfer_encoding {
688            let mut text = "".to_string();
689            for line in body.lines() {
690                text += line;
691            }
692            data_str = text;
693        }
694
695        let body = content_transfer_encoding.decode(data_str.as_bytes().to_vec())?;
696        let md5 = br_crypto::md5::encrypt_hex(&body.clone());
697        let size = body.len();
698        // Use custom temp_dir if set, otherwise system temp; prefix filename with md5 to avoid collisions
699        let base_dir = self.temp_dir.clone().unwrap_or_else(env::temp_dir);
700        let safe_filename = format!("{md5}_{filename}");
701        let mut file_path = base_dir.clone();
702        file_path.push(&safe_filename);
703        let path_temp_dir = file_path.clone();
704        // Ensure the directory exists (for custom temp_dir)
705        if let Some(parent) = file_path.parent() {
706            let _ = fs::create_dir_all(parent);
707        }
708
709        let mut temp_file = match fs::File::create(file_path.clone()) {
710            Ok(e) => e,
711            Err(e) => {
712                return Err(Error::other(format!(
713                    "打开(创建)临时文件: {e} [{filename}]"
714                )))
715            }
716        };
717
718        if temp_file.write(body.as_slice()).is_ok() {
719            if content_type.is_empty() {
720                content_type = path_temp_dir
721                    .extension()
722                    .unwrap_or(OsStr::new("unknown"))
723                    .to_str()
724                    .unwrap_or("unknown")
725                    .to_string();
726            }
727
728            self.files[md5.as_str()] = object! {
729                name:filename,
730                md5:md5.clone(),
731                size:size,
732                "content-type":content_type.clone(),
733                file:file_path.to_str()
734            };
735        };
736        Ok(())
737    }
738}
739
740impl Default for AnalyzeEmails {
741    fn default() -> Self {
742        Self {
743            debug: false,
744            header: Default::default(),
745            mime_version: "".to_string(),
746            boundary: "".to_string(),
747            md5: "".to_string(),
748            size: 0,
749            timestamp: 0,
750            datetime: "".to_string(),
751            subject: "".to_string(),
752            from: Default::default(),
753            to: Default::default(),
754            cc: Default::default(),
755            replyto: Default::default(),
756            content_type: "".to_string(),
757            content_transfer_encoding: ContentTransferEncoding::None,
758            sender: "".to_string(),
759            body_text: "".to_string(),
760            body_html: "".to_string(),
761            files: JsonValue::Null,
762            charset: "".to_string(),
763            content_filename: "".to_string(),
764            depth: 0,
765            temp_dir: None,
766        }
767    }
768}
769
770/// 编码规则
771/// 选择 Content-Transfer-Encoding 的原则
772///
773/// 纯文本: 如果内容是纯文本且只包含 ASCII 字符,通常使用 7bit。
774/// 非 ASCII 文本: 如果内容包含非 ASCII 字符,可以使用 quoted-printable 或 8bit,具体取决于内容和兼容性要求。
775/// 二进制数据: 对于图像、视频、音频等二进制数据,通常使用 base64 编码。
776#[derive(Debug, Clone)]
777pub enum ContentTransferEncoding {
778    /// 这种编码方式主要用于编码文本数据,它保持大部分文本的可读性,但会对非 ASCII 字符和特殊字符(如 =, ?, & 等)进行编码,以确保兼容性。
779    /// 适用于包含大量特殊字符或非 ASCII 文本的邮件内容。
780    QuotedPrintable,
781    ///    将二进制数据编码为 ASCII 字符串,使用 64 个字符的字母表(A-Z, a-z, 0-9, +, /)表示二进制数据。每 3 个字节的二进制数据编码为 4 个字符,便于在邮件中传输。
782    /// 常用于编码附件、图像、音频、视频等二进制数据。
783    Base64,
784    /// 表示内容是二进制数据,不能被转义或编码,必须保持原始的二进制格式进行传输。这种编码方式通常用于图像、音频等二进制文件。
785    /// 这种编码要求邮件传输代理能够处理所有可能的字节值,几乎不做任何转换,因此也不是所有系统都支持。
786    Binary,
787    /// 表示内容包含 8 位字符,这意味着它可能包含非 ASCII 字符(如带有音标的字母)。尽管这样编码的邮件可以包含更多字符,但并非所有邮件传输代理都支持 8bit 传输。
788    /// 适用于非 ASCII 的文本数据,但需要确保邮件传输链路支持 8bit 数据传输。
789    Bit8,
790    /// 表示内容是 ASCII 文本,仅包含 7 位字符(即标准 ASCII 字符集),每个字符的最高位是 0。这种编码方式是最常用的,因为它适合绝大多数邮件传输系统。
791    /// 适用于纯文本邮件,不包含任何特殊字符或二进制数据。
792    Bit7,
793    None,
794}
795
796impl ContentTransferEncoding {
797    fn from(value: &str) -> Self {
798        match value.to_lowercase().as_str() {
799            "7bit" => Self::Bit7,
800            "8bit" => Self::Bit8,
801            "binary" => Self::Binary,
802            "base64" => Self::Base64,
803            "quoted-printable" => Self::QuotedPrintable,
804            _ => Self::None,
805        }
806    }
807    fn decode(&mut self, mut data: Vec<u8>) -> io::Result<Vec<u8>> {
808        let res = match self {
809            ContentTransferEncoding::QuotedPrintable => br_crypto::qp::decode(data)?,
810            ContentTransferEncoding::Base64 => {
811                let str = String::from_utf8_lossy(&data).to_string();
812                let mut text = "".to_string();
813                for line in str.lines() {
814                    text += line;
815                }
816                data = text.as_bytes().to_vec();
817                br_crypto::base64::decode_u8(data)
818            }
819            ContentTransferEncoding::Binary => data,
820            ContentTransferEncoding::Bit8 => data,
821            ContentTransferEncoding::Bit7 => data,
822            ContentTransferEncoding::None => data,
823        };
824        Ok(res)
825    }
826}
827
828#[cfg(test)]
829#[allow(clippy::field_reassign_with_default)]
830mod tests {
831    use super::*;
832    use std::time::{SystemTime, UNIX_EPOCH};
833    use std::{env, fs};
834
835    fn unique_token(prefix: &str) -> String {
836        let nanos = SystemTime::now()
837            .duration_since(UNIX_EPOCH)
838            .unwrap()
839            .as_nanos();
840        format!("{prefix}-{nanos}-{}", std::process::id())
841    }
842
843    fn multipart_email(content_type: &str, boundary: &str, part: &str) -> Vec<u8> {
844        format!(
845            "From: sender@example.com\r\n\
846To: receiver@example.com\r\n\
847Subject: Multipart Test\r\n\
848Content-Type: {content_type};boundary=\"{boundary}\";charset=\"utf-8\"\r\n\
849Content-Transfer-Encoding: 7bit\r\n\
850Date: Mon, 01 Jan 2024 12:00:00 GMT (UTC)\r\n\
851\r\n\
852--{boundary}\r\n\
853{part}\r\n\
854--{boundary}--\r\n"
855        )
856        .into_bytes()
857    }
858
859    #[test]
860    fn test_content_transfer_encoding_from() {
861        assert!(matches!(
862            ContentTransferEncoding::from("7bit"),
863            ContentTransferEncoding::Bit7
864        ));
865        assert!(matches!(
866            ContentTransferEncoding::from("8bit"),
867            ContentTransferEncoding::Bit8
868        ));
869        assert!(matches!(
870            ContentTransferEncoding::from("base64"),
871            ContentTransferEncoding::Base64
872        ));
873        assert!(matches!(
874            ContentTransferEncoding::from("BASE64"),
875            ContentTransferEncoding::Base64
876        ));
877        assert!(matches!(
878            ContentTransferEncoding::from("quoted-printable"),
879            ContentTransferEncoding::QuotedPrintable
880        ));
881        assert!(matches!(
882            ContentTransferEncoding::from("binary"),
883            ContentTransferEncoding::Binary
884        ));
885        assert!(matches!(
886            ContentTransferEncoding::from("unknown"),
887            ContentTransferEncoding::None
888        ));
889    }
890
891    #[test]
892    fn test_content_transfer_encoding_decode_7bit() {
893        let mut enc = ContentTransferEncoding::Bit7;
894        let data = b"Hello World".to_vec();
895        let result = enc.decode(data.clone()).unwrap();
896        assert_eq!(result, data);
897    }
898
899    #[test]
900    fn test_content_transfer_encoding_decode_8bit() {
901        let mut enc = ContentTransferEncoding::Bit8;
902        let data = "你好世界".as_bytes().to_vec();
903        let result = enc.decode(data.clone()).unwrap();
904        assert_eq!(result, data);
905    }
906
907    #[test]
908    fn test_content_transfer_encoding_decode_base64() {
909        let mut enc = ContentTransferEncoding::Base64;
910        let data = b"SGVsbG8gV29ybGQ=".to_vec();
911        let result = enc.decode(data).unwrap();
912        assert_eq!(result, b"Hello World");
913    }
914
915    #[test]
916    fn test_analyze_emails_default() {
917        let email = AnalyzeEmails::default();
918        assert!(!email.debug);
919        assert_eq!(email.size, 0);
920        assert_eq!(email.timestamp, 0);
921        assert!(email.subject.is_empty());
922        assert!(email.from.is_empty());
923        assert!(email.to.is_empty());
924    }
925
926    #[test]
927    fn test_analyze_simple_email() {
928        let email_data = b"From: sender@example.com\r\n\
929To: receiver@example.com\r\n\
930Subject: Test Subject\r\n\
931Content-Type: text/plain\r\n\
932Date: 01 Jan 2024 12:00:00 +0000\r\n\
933\r\n\
934Hello, this is a test email body."
935            .to_vec();
936
937        let result = AnalyzeEmails::new(email_data, false).unwrap();
938        assert_eq!(result.subject, "Test Subject");
939        assert_eq!(result.content_type, "text/plain");
940        assert!(result.from.contains_key("sender@example.com"));
941        assert!(result.to.contains_key("receiver@example.com"));
942        assert_eq!(result.body_text, "Hello, this is a test email body.");
943    }
944
945    #[test]
946    fn test_analyze_email_with_encoded_subject() {
947        let email_data = b"From: test@example.com\r\n\
948To: receiver@example.com\r\n\
949Subject: =?UTF-8?B?5rWL6K+V5Li76aKY?=\r\n\
950Content-Type: text/plain\r\n\
951Date: 01 Jan 2024 12:00:00 +0000\r\n\
952\r\n\
953Test body"
954            .to_vec();
955
956        let result = AnalyzeEmails::new(email_data, false).unwrap();
957        assert!(result.subject.contains("测试主题"));
958    }
959
960    #[test]
961    fn test_analyze_email_html() {
962        let email_data = b"From: sender@example.com\r\n\
963To: receiver@example.com\r\n\
964Subject: HTML Test\r\n\
965Content-Type: text/html\r\n\
966Date: 01 Jan 2024 12:00:00 +0000\r\n\
967\r\n\
968<html><body><h1>Hello</h1></body></html>"
969            .to_vec();
970
971        let result = AnalyzeEmails::new(email_data, false).unwrap();
972        assert_eq!(result.content_type, "text/html");
973        assert!(result.body_html.contains("<h1>Hello</h1>"));
974    }
975
976    #[test]
977    fn test_analyze_email_invalid_format() {
978        let invalid_data = b"This is not a valid email".to_vec();
979        let result = AnalyzeEmails::new(invalid_data, false);
980        assert!(result.is_err());
981    }
982
983    #[test]
984    fn test_from_parsing() {
985        let mut email = AnalyzeEmails::default();
986
987        let result = email.from(r#""John Doe" <john@example.com>"#);
988        assert_eq!(result.get("john@example.com").unwrap(), "John Doe");
989
990        let result = email.from(r#"<simple@example.com>"#);
991        assert_eq!(
992            result.get("simple@example.com").unwrap(),
993            "simple@example.com"
994        );
995    }
996
997    #[test]
998    fn test_email_encoded_parsing() {
999        let mut email = AnalyzeEmails::default();
1000
1001        let result = email.email_encoded(r#"<a@test.com>, <b@test.com>"#);
1002        assert!(result.contains_key("a@test.com"));
1003        assert!(result.contains_key("b@test.com"));
1004    }
1005
1006    #[test]
1007    fn test_analyze_email_with_cc() {
1008        let email_data = b"From: sender@example.com\r\n\
1009To: receiver@example.com\r\n\
1010Cc: cc1@example.com, cc2@example.com\r\n\
1011Subject: CC Test\r\n\
1012Content-Type: text/plain\r\n\
1013Date: 01 Jan 2024 12:00:00 +0000\r\n\
1014\r\n\
1015Test body"
1016            .to_vec();
1017
1018        let result = AnalyzeEmails::new(email_data, false).unwrap();
1019        assert!(result.cc.contains_key("cc1@example.com"));
1020        assert!(result.cc.contains_key("cc2@example.com"));
1021    }
1022
1023    #[test]
1024    fn test_content_transfer_encoding_decode_binary() {
1025        let mut enc = ContentTransferEncoding::Binary;
1026        let data = vec![0x00, 0x01, 0x02, 0xFF, 0xFE];
1027        let result = enc.decode(data.clone()).unwrap();
1028        assert_eq!(result, data);
1029    }
1030
1031    #[test]
1032    fn test_content_transfer_encoding_decode_none() {
1033        let mut enc = ContentTransferEncoding::None;
1034        let data = b"raw data".to_vec();
1035        let result = enc.decode(data.clone()).unwrap();
1036        assert_eq!(result, data);
1037    }
1038
1039    #[test]
1040    fn test_analyze_email_with_reply_to() {
1041        let email_data = b"From: sender@example.com\r\n\
1042To: receiver@example.com\r\n\
1043Reply-To: reply@example.com\r\n\
1044Subject: Reply-To Test\r\n\
1045Content-Type: text/plain\r\n\
1046Date: 01 Jan 2024 12:00:00 +0000\r\n\
1047\r\n\
1048Test body"
1049            .to_vec();
1050
1051        let result = AnalyzeEmails::new(email_data, false).unwrap();
1052        assert!(result.replyto.contains_key("reply@example.com"));
1053    }
1054
1055    #[test]
1056    fn test_analyze_email_with_sender() {
1057        let email_data = b"From: sender@example.com\r\n\
1058Sender: actual-sender@example.com\r\n\
1059To: receiver@example.com\r\n\
1060Subject: Sender Test\r\n\
1061Content-Type: text/plain\r\n\
1062Date: 01 Jan 2024 12:00:00 +0000\r\n\
1063\r\n\
1064Test body"
1065            .to_vec();
1066
1067        let result = AnalyzeEmails::new(email_data, false).unwrap();
1068        assert_eq!(result.sender, "actual-sender@example.com");
1069    }
1070
1071    #[test]
1072    fn test_analyze_email_with_mion() {
1073        let email_data = b"From: sender@example.com\r\n\
1074To: receiver@example.com\r\n\
1075MIME-Version: 1.0\r\n\
1076Subject: MIME Test\r\n\
1077Content-Type: text/plain\r\n\
1078Date: 01 Jan 2024 12:00:00 +0000\r\n\
1079\r\n\
1080Test body"
1081            .to_vec();
1082
1083        let result = AnalyzeEmails::new(email_data, false).unwrap();
1084        assert_eq!(result.mime_version, "1.0");
1085    }
1086
1087    #[test]
1088    fn test_analyze_email_lf_only() {
1089        let email_data = b"From: sender@example.com\n\
1090To: receiver@example.com\n\
1091Subject: LF Only Test\n\
1092Content-Type: text/plain\n\
1093Date: 01 Jan 2024 12:00:00 +0000\n\
1094\n\
1095Test body with LF only"
1096            .to_vec();
1097
1098        let result = AnalyzeEmails::new(email_data, false).unwrap();
1099        assert_eq!(result.subject, "LF Only Test");
1100    }
1101
1102    #[test]
1103    fn test_analyze_email_with_custom_header() {
1104        let email_data = b"From: sender@example.com\r\n\
1105To: receiver@example.com\r\n\
1106X-Custom-Header: custom-value\r\n\
1107Subject: Custom Header Test\r\n\
1108Content-Type: text/plain\r\n\
1109Date: 01 Jan 2024 12:00:00 +0000\r\n\
1110\r\n\
1111Test body"
1112            .to_vec();
1113
1114        let result = AnalyzeEmails::new(email_data, false).unwrap();
1115        assert_eq!(
1116            result.header.get("x-custom-header").unwrap(),
1117            "custom-value"
1118        );
1119    }
1120
1121    #[test]
1122    fn test_analyze_email_base64_body() {
1123        let email_data = b"From: sender@example.com\r\n\
1124To: receiver@example.com\r\n\
1125Subject: Base64 Test\r\n\
1126Content-Type: text/plain\r\n\
1127Content-Transfer-Encoding: base64\r\n\
1128Date: 01 Jan 2024 12:00:00 +0000\r\n\
1129\r\n\
1130SGVsbG8gV29ybGQ="
1131            .to_vec();
1132
1133        let result = AnalyzeEmails::new(email_data, false).unwrap();
1134        assert_eq!(result.body_text, "Hello World");
1135    }
1136
1137    #[test]
1138    fn test_from_parsing_simple_email() {
1139        let mut email = AnalyzeEmails::default();
1140        let result = email.from("user@example.com");
1141        assert!(result.contains_key("user@example.com"));
1142    }
1143
1144    #[test]
1145    fn test_email_encoded_with_name() {
1146        let mut email = AnalyzeEmails::default();
1147        let result = email.email_encoded(r#"John Doe <john@example.com>"#);
1148        assert_eq!(result.get("john@example.com").unwrap(), "John Doe");
1149    }
1150
1151    #[test]
1152    fn test_analyze_invalid_email_debug_writes_eml() {
1153        let invalid_data = b"invalid-email-without-separator".to_vec();
1154        let md5 = br_crypto::md5::encrypt_hex(&invalid_data);
1155        let path = env::current_dir().unwrap().join(format!("xygs-{md5}.eml"));
1156        let _ = fs::remove_file(&path);
1157
1158        let result = AnalyzeEmails::new(invalid_data.clone(), true);
1159        assert!(result.is_err());
1160        assert!(path.exists());
1161        assert_eq!(fs::read(&path).unwrap(), invalid_data);
1162
1163        let _ = fs::remove_file(path);
1164    }
1165
1166    #[test]
1167    fn test_header_colon_only_and_empty_value_skip() {
1168        let email_data = b"From: sender@example.com\r\n\
1169To: receiver@example.com\r\n\
1170Subject:Colon Header\r\n\
1171Content-Type:text/plain;charset=\"utf-8\"\r\n\
1172X-No-Space:value-without-space\r\n\
1173X-Empty:\r\n\
1174Date:Mon, 01 Jan 2024 12:00:00 GMT\r\n\
1175\r\n\
1176Body"
1177            .to_vec();
1178
1179        let result = AnalyzeEmails::new(email_data, false).unwrap();
1180        assert_eq!(result.subject, "Colon Header");
1181        assert_eq!(result.charset, "utf-8");
1182        assert_eq!(
1183            result.header.get("x-no-space").unwrap(),
1184            "value-without-space"
1185        );
1186        assert!(!result.header.contains_key("x-empty"));
1187    }
1188
1189    #[test]
1190    fn test_header_line_without_any_colon_is_skipped() {
1191        let email_data = b"From: sender@example.com\r\n\
1192To: receiver@example.com\r\n\
1193no-colon-line-here\r\n\
1194Subject: Test\r\n\
1195Content-Type: text/plain\r\n\
1196Date: 01 Jan 2024 12:00:00 +0000\r\n\
1197\r\n\
1198body"
1199            .to_vec();
1200
1201        let result = AnalyzeEmails::new(email_data, false).unwrap();
1202        assert_eq!(result.subject, "Test");
1203    }
1204
1205    #[test]
1206    fn test_multipart_body_parsing_for_all_supported_types() {
1207        let content_types = [
1208            "multipart/mixed",
1209            "multipart/alternative",
1210            "multipart/related",
1211            "multipart/report",
1212        ];
1213
1214        for content_type in content_types {
1215            let boundary = unique_token("boundary");
1216            let email_data = multipart_email(
1217                content_type,
1218                boundary.as_str(),
1219                "Content-Type: text/plain\r\nContent-Transfer-Encoding: 7bit\r\n\r\nHello multipart body",
1220            );
1221            let result = AnalyzeEmails::new(email_data, false).unwrap();
1222
1223            assert_eq!(result.content_type, content_type);
1224            assert_eq!(result.charset, "utf-8");
1225            assert!(result.body_text.contains("Hello multipart body"));
1226        }
1227    }
1228
1229    #[test]
1230    fn test_parts_header_parse_failure_debug_writes_file() {
1231        let mut email = AnalyzeEmails::default();
1232        email.debug = true;
1233        email.md5 = unique_token("head");
1234        email.files = object! {};
1235
1236        let path = env::current_dir()
1237            .unwrap()
1238            .join(format!("head-{}.eml", email.md5));
1239        let _ = fs::remove_file(&path);
1240
1241        let result = email.parts(
1242            "invalid-part-content".to_string(),
1243            "raw-email-data".to_string(),
1244        );
1245        assert!(result.is_err());
1246        assert!(path.exists());
1247        assert_eq!(fs::read_to_string(&path).unwrap(), "raw-email-data");
1248
1249        let _ = fs::remove_file(path);
1250    }
1251
1252    #[test]
1253    fn test_parts_unknown_header_is_ignored() {
1254        let mut email = AnalyzeEmails::default();
1255        email.charset = "utf-8".to_string();
1256        email.files = object! {};
1257
1258        let result = email.parts(
1259            "X-Unknown: value\r\nContent-Type: text/plain\r\n\r\nbody".to_string(),
1260            "raw".to_string(),
1261        );
1262        assert!(result.is_ok());
1263        assert_eq!(email.body_text, "body");
1264    }
1265
1266    #[test]
1267    fn test_parts_text_plain_with_name_as_attachment() {
1268        let mut email = AnalyzeEmails::default();
1269        email.charset = "utf-8".to_string();
1270        email.files = object! {};
1271
1272        let filename = format!("{}.txt", unique_token("plain-attachment"));
1273        let part = format!(
1274            "Content-Type:text/plain; name=\"{filename}\"\r\nContent-Transfer-Encoding:base64\r\n\r\nSGVsbG8gQXR0YWNobWVudA=="
1275        );
1276
1277        email.parts(part, "raw".to_string()).unwrap();
1278
1279        let body = b"Hello Attachment".to_vec();
1280        let md5 = br_crypto::md5::encrypt_hex(&body);
1281        let entry = &email.files[md5.as_str()];
1282
1283        assert_eq!(entry["name"].as_str().unwrap(), filename);
1284        assert_eq!(entry["content-type"].as_str().unwrap(), "txt");
1285        let path = entry["file"].as_str().unwrap();
1286        assert_eq!(fs::read(path).unwrap(), body);
1287
1288        let _ = fs::remove_file(path);
1289    }
1290
1291    #[test]
1292    fn test_parts_text_html_with_name_as_attachment() {
1293        let mut email = AnalyzeEmails::default();
1294        email.charset = "utf-8".to_string();
1295        email.files = object! {};
1296
1297        let filename = format!("{}.html", unique_token("html-attachment"));
1298        let part = format!(
1299            "Content-Type: text/html; name=\"{filename}\"\r\nContent-Transfer-Encoding: base64\r\n\r\nPGgxPkhlbGxvIEhUTUwgQXR0YWNobWVudDwvaDE+"
1300        );
1301
1302        email.parts(part, "raw".to_string()).unwrap();
1303
1304        let body = b"<h1>Hello HTML Attachment</h1>".to_vec();
1305        let md5 = br_crypto::md5::encrypt_hex(&body);
1306        let entry = &email.files[md5.as_str()];
1307
1308        assert_eq!(entry["name"].as_str().unwrap(), filename);
1309        assert_eq!(entry["content-type"].as_str().unwrap(), "html");
1310        let path = entry["file"].as_str().unwrap();
1311        assert_eq!(fs::read(path).unwrap(), body);
1312
1313        let _ = fs::remove_file(path);
1314    }
1315
1316    #[test]
1317    fn test_parts_content_disposition_filename() {
1318        let mut email = AnalyzeEmails::default();
1319        email.charset = "utf-8".to_string();
1320        email.files = object! {};
1321
1322        let filename = format!("{}.pdf", unique_token("filename"));
1323        let part = format!(
1324            "Content-Type: application/pdf\r\nContent-Transfer-Encoding: base64\r\nContent-Disposition: attachment; filename=\"{filename}\"\r\n\r\nSGVsbG8gUERG"
1325        );
1326
1327        email.parts(part, "raw".to_string()).unwrap();
1328
1329        let body = b"Hello PDF".to_vec();
1330        let md5 = br_crypto::md5::encrypt_hex(&body);
1331        let entry = &email.files[md5.as_str()];
1332
1333        assert_eq!(entry["name"].as_str().unwrap(), filename);
1334        assert_eq!(entry["content-type"].as_str().unwrap(), "application/pdf");
1335        let path = entry["file"].as_str().unwrap();
1336        assert_eq!(fs::read(path).unwrap(), body);
1337
1338        let _ = fs::remove_file(path);
1339    }
1340
1341    #[test]
1342    fn test_parts_content_disposition_filename_utf8_star() {
1343        let mut email = AnalyzeEmails::default();
1344        email.charset = "utf-8".to_string();
1345        email.files = object! {};
1346
1347        let part = "Content-Type: application/octet-stream\r\n\
1348Content-Transfer-Encoding: base64\r\n\
1349Content-Disposition: attachment; filename*=utf-8''hello%20world.txt\r\n\
1350\r\n\
1351SGVsbG8gVVJMIEZpbGU="
1352            .to_string();
1353
1354        email.parts(part, "raw".to_string()).unwrap();
1355
1356        let body = b"Hello URL File".to_vec();
1357        let md5 = br_crypto::md5::encrypt_hex(&body);
1358        let entry = &email.files[md5.as_str()];
1359
1360        assert_eq!(entry["name"].as_str().unwrap(), "hello world.txt");
1361        assert_eq!(
1362            entry["content-type"].as_str().unwrap(),
1363            "application/octet-stream"
1364        );
1365        let path = entry["file"].as_str().unwrap();
1366        assert_eq!(fs::read(path).unwrap(), body);
1367
1368        let _ = fs::remove_file(path);
1369    }
1370
1371    #[test]
1372    fn test_parts_nested_multipart() {
1373        let mut email = AnalyzeEmails::default();
1374        email.charset = "utf-8".to_string();
1375        email.files = object! {};
1376
1377        let boundary = unique_token("inner-boundary");
1378        let part = format!(
1379            "Content-Type: multipart/alternative; boundary=\"{boundary}\"\r\n\
1380Content-Transfer-Encoding: 7bit\r\n\
1381\r\n\
1382--{boundary}\r\n\
1383Content-Type: text/plain\r\n\
1384Content-Transfer-Encoding: 7bit\r\n\
1385\r\n\
1386Nested text body\r\n\
1387--{boundary}--\r\n"
1388        );
1389
1390        email.parts(part, "raw".to_string()).unwrap();
1391        assert!(email.body_text.contains("Nested text body"));
1392    }
1393
1394    #[test]
1395    fn test_parts_text_calendar_is_skipped() {
1396        let mut email = AnalyzeEmails::default();
1397        email.charset = "utf-8".to_string();
1398        email.files = object! {};
1399        email.body_text = "keep-me".to_string();
1400
1401        let part = "Content-Type: text/calendar\r\n\
1402Content-Transfer-Encoding: 7bit\r\n\
1403\r\n\
1404BEGIN:VCALENDAR"
1405            .to_string();
1406
1407        email.parts(part, "raw".to_string()).unwrap();
1408        assert_eq!(email.body_text, "keep-me");
1409    }
1410
1411    #[test]
1412    fn test_parts_application_content_types_are_saved() {
1413        let mut email = AnalyzeEmails::default();
1414        email.charset = "utf-8".to_string();
1415        email.files = object! {};
1416
1417        let content_types = [
1418            "application/octet-stream",
1419            "application/zip",
1420            "application/pdf",
1421            "image/jpeg",
1422            "image/png",
1423            "image/gif",
1424            "application/ics",
1425            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1426            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1427            "application/vnd.ms-excel",
1428        ];
1429        let body = b"Hello File".to_vec();
1430        let md5 = br_crypto::md5::encrypt_hex(&body);
1431
1432        for (idx, content_type) in content_types.iter().enumerate() {
1433            let filename = format!("{}-{idx}.bin", unique_token("app-attachment"));
1434            let part = format!(
1435                "Content-Type: {content_type}; name=\"{filename}\"\r\nContent-Transfer-Encoding: base64\r\n\r\nSGVsbG8gRmlsZQ=="
1436            );
1437
1438            email.parts(part, "raw".to_string()).unwrap();
1439
1440            let entry = &email.files[md5.as_str()];
1441            assert_eq!(entry["name"].as_str().unwrap(), filename);
1442            assert_eq!(entry["content-type"].as_str().unwrap(), *content_type);
1443
1444            let path = entry["file"].as_str().unwrap();
1445            assert_eq!(fs::read(path).unwrap(), body);
1446            let _ = fs::remove_file(path);
1447        }
1448    }
1449
1450    #[test]
1451    fn test_parts_unknown_content_type_debug_writes_file() {
1452        let mut email = AnalyzeEmails::default();
1453        email.debug = true;
1454        email.md5 = unique_token("content-type");
1455        email.charset = "utf-8".to_string();
1456        email.files = object! {};
1457
1458        let path = env::current_dir()
1459            .unwrap()
1460            .join(format!("content_type-{}.eml", email.md5));
1461        let _ = fs::remove_file(&path);
1462
1463        let part = "Content-Type: application/x-custom\r\n\
1464Content-Transfer-Encoding: 7bit\r\n\
1465\r\n\
1466custom body"
1467            .to_string();
1468
1469        email
1470            .parts(part, "raw-unknown-content-type".to_string())
1471            .unwrap();
1472        assert!(path.exists());
1473        assert_eq!(
1474            fs::read_to_string(&path).unwrap(),
1475            "raw-unknown-content-type"
1476        );
1477
1478        let _ = fs::remove_file(path);
1479    }
1480
1481    #[test]
1482    fn test_encoded_line_paths() {
1483        let mut email = AnalyzeEmails::default();
1484
1485        assert_eq!(email.encoded_line("plain text"), "plain text");
1486        assert_eq!(email.encoded_line("=?UTF-8?Q?=48=65=6C=6C=6F?="), "Hello");
1487        assert_eq!(
1488            email.encoded_line("=?UTF-8?X?UnknownEncoding?="),
1489            "UnknownEncoding"
1490        );
1491    }
1492
1493    #[test]
1494    fn test_datetime_parsing_variants_and_error() {
1495        let mut email = AnalyzeEmails::default();
1496
1497        email
1498            .datetime("Mon, 01 Jan 2024 12:00:00 GMT (UTC)")
1499            .unwrap();
1500        assert!(email.timestamp > 0);
1501        assert!(!email.datetime.is_empty());
1502
1503        let err = email.datetime("invalid datetime format").unwrap_err();
1504        assert!(err.to_string().contains("时间解析失败"));
1505    }
1506
1507
1508    #[test]
1509    fn test_datetime_us_style_month_first() {
1510        let mut email = AnalyzeEmails::default();
1511        // 美式日期格式: Mon dd YYYY (如 "Nov 4 2025 22:19:26 +0800")
1512        email
1513            .datetime("Mon, Nov 4 2025 22:19:26 +0800")
1514            .unwrap();
1515        assert!(email.timestamp > 0);
1516        assert!(!email.datetime.is_empty());
1517        // 标准格式仍然正常工作
1518        email
1519            .datetime("Tue, 15 Oct 2024 08:30:00 +0000")
1520            .unwrap();
1521        assert!(email.timestamp > 0);
1522    }
1523    #[test]
1524    fn test_email_encoded_with_encoded_and_quoted_names() {
1525        let mut email = AnalyzeEmails::default();
1526        let result = email.email_encoded(
1527            "\"Quoted User\" <quoted@example.com>, =?UTF-8?B?5rWL6K+V?= <encoded@example.com>",
1528        );
1529
1530        assert_eq!(result.get("quoted@example.com").unwrap(), "Quoted User");
1531        assert_eq!(result.get("encoded@example.com").unwrap(), "测试");
1532    }
1533
1534    #[test]
1535    fn test_set_files_base64_decodes_and_detects_extension() {
1536        let mut email = AnalyzeEmails::default();
1537        email.files = object! {};
1538
1539        let filename = format!("{}.txt", unique_token("set-files"));
1540        email
1541            .set_files(
1542                ContentTransferEncoding::Base64,
1543                "c2V0IGZpbGVzIGJvZHk=\r\n",
1544                filename.as_str(),
1545                "".to_string(),
1546            )
1547            .unwrap();
1548
1549        let body = b"set files body".to_vec();
1550        let md5 = br_crypto::md5::encrypt_hex(&body);
1551        let entry = &email.files[md5.as_str()];
1552
1553        assert_eq!(entry["name"].as_str().unwrap(), filename);
1554        assert_eq!(entry["content-type"].as_str().unwrap(), "txt");
1555        let path = entry["file"].as_str().unwrap();
1556        assert_eq!(fs::read(path).unwrap(), body);
1557
1558        let _ = fs::remove_file(path);
1559    }
1560
1561    #[test]
1562    fn test_header_parses_colon_without_space_separator() {
1563        let email_data = b"From:sender@example.com\r\n\
1564To:receiver@example.com\r\n\
1565X-Test:nospaceval\r\n\
1566Content-Type:text/plain\r\n\
1567Date: 01 Jan 2024 12:00:00 +0000\r\n\
1568\r\n\
1569body"
1570            .to_vec();
1571
1572        let result = AnalyzeEmails::new(email_data, false).unwrap();
1573        assert_eq!(result.header.get("x-test").unwrap(), "nospaceval");
1574    }
1575
1576    #[test]
1577    fn test_multipart_header_without_boundary_keeps_boundary_empty() {
1578        let email_data = b"From: sender@example.com\r\n\
1579To: receiver@example.com\r\n\
1580Subject: Multipart Without Boundary\r\n\
1581Content-Type: multipart/mixed; charset=utf-8\r\n\
1582Date: 01 Jan 2024 12:00:00 +0000\r\n\
1583\r\n\
1584body without multipart markers"
1585            .to_vec();
1586
1587        let result = AnalyzeEmails::new(email_data, false).unwrap();
1588        assert_eq!(result.content_type, "multipart/mixed");
1589        assert!(result.boundary.is_empty());
1590    }
1591
1592    #[test]
1593    fn test_body_multipart_boundary_not_found_uses_original_body() {
1594        let boundary = unique_token("missing-boundary");
1595        let email_data = format!(
1596            "From: sender@example.com\r\n\
1597To: receiver@example.com\r\n\
1598Subject: Boundary Missing In Body\r\n\
1599Content-Type: multipart/mixed;boundary=\"{boundary}\";charset=\"utf-8\"\r\n\
1600Content-Transfer-Encoding: 7bit\r\n\
1601Date: 01 Jan 2024 12:00:00 +0000\r\n\
1602\r\n\
1603this body intentionally has no boundary lines"
1604        )
1605        .into_bytes();
1606
1607        let result = AnalyzeEmails::new(email_data, false).unwrap();
1608        assert_eq!(result.content_type, "multipart/mixed");
1609        assert_eq!(result.boundary, boundary);
1610        assert!(result.body_text.is_empty());
1611        assert!(result.body_html.is_empty());
1612    }
1613
1614    #[test]
1615    fn test_body_multipart_skips_empty_part_segments() {
1616        let boundary = unique_token("empty-part");
1617        let email_data = format!(
1618            "From: sender@example.com\r\n\
1619To: receiver@example.com\r\n\
1620Subject: Empty Multipart Segment\r\n\
1621Content-Type: multipart/mixed; boundary=\"{boundary}\"\r\n\
1622Date: 01 Jan 2024 12:00:00 +0000\r\n\
1623\r\n\
1624--{boundary}\r\n\
1625Content-Type: text/plain\r\n\
1626Content-Transfer-Encoding: 7bit\r\n\
1627\r\n\
1628first text\r\n\
1629--{boundary}\r\n\
1630\r\n\
1631--{boundary}--\r\n"
1632        )
1633        .into_bytes();
1634
1635        let result = AnalyzeEmails::new(email_data, false).unwrap();
1636        assert_eq!(result.content_type, "multipart/mixed");
1637        assert!(result.body_text.contains("first text"));
1638    }
1639
1640    #[test]
1641    fn test_body_unknown_content_type_skips_gracefully() {
1642        let email_data = b"From: a@b.com\r\n\
1643Content-Type: application/json\r\n\
1644Date: 01 Jan 2024 12:00:00 +0000\r\n\
1645\r\n\
1646{\"key\":\"value\"}"
1647            .to_vec();
1648        let result = AnalyzeEmails::new(email_data, false);
1649        assert!(result.is_ok());
1650        let email = result.unwrap();
1651        assert!(email.body_text.is_empty());
1652        assert!(email.body_html.is_empty());
1653    }
1654
1655    #[test]
1656    fn test_parts_ignores_header_line_without_colon() {
1657        let mut email = AnalyzeEmails::default();
1658        email.charset = "utf-8".to_string();
1659        email.files = object! {};
1660
1661        let part = "NoColonHeader\r\n\
1662Content-Type: text/plain\r\n\
1663Content-Transfer-Encoding: 7bit\r\n\
1664\r\n\
1665plain body"
1666            .to_string();
1667
1668        email.parts(part, "raw".to_string()).unwrap();
1669        assert_eq!(email.body_text, "plain body");
1670    }
1671
1672    #[test]
1673    fn test_parts_content_type_boundary_with_semicolon_suffix() {
1674        let mut email = AnalyzeEmails::default();
1675        email.charset = "utf-8".to_string();
1676        email.files = object! {};
1677
1678        let boundary = unique_token("inner-semi");
1679        let part = format!(
1680            "Content-Type: multipart/alternative; boundary=\"{boundary}\"; charset=\"utf-8\"\r\n\
1681Content-Transfer-Encoding: 7bit\r\n\
1682\r\n\
1683--{boundary}\r\n\
1684Content-Type: text/plain\r\n\
1685Content-Transfer-Encoding: 7bit\r\n\
1686\r\n\
1687nested plain body\r\n\
1688--{boundary}--\r\n"
1689        );
1690
1691        email.parts(part, "raw".to_string()).unwrap();
1692        assert!(email.body_text.contains("nested plain body"));
1693    }
1694
1695    #[test]
1696    fn test_parts_text_html_without_filename_sets_body_html() {
1697        let mut email = AnalyzeEmails::default();
1698        email.charset = "utf-8".to_string();
1699        email.files = object! {};
1700
1701        let part = "Content-Type: text/html\r\n\
1702Content-Transfer-Encoding: 7bit\r\n\
1703\r\n\
1704<p>inline html body</p>"
1705            .to_string();
1706
1707        email.parts(part, "raw".to_string()).unwrap();
1708        assert!(email.body_html.contains("<p>inline html body</p>"));
1709    }
1710
1711    #[test]
1712    fn test_parts_nested_multipart_outer_boundary_not_found_skips_empty_part() {
1713        let mut email = AnalyzeEmails::default();
1714        email.charset = "utf-8".to_string();
1715        email.files = object! {};
1716        email.boundary = unique_token("outer-boundary");
1717
1718        let inner_boundary = unique_token("inner-boundary");
1719        let part = format!(
1720            "Content-Type: multipart/alternative; boundary=\"{inner_boundary}\"\r\n\
1721Content-Transfer-Encoding: 7bit\r\n\
1722\r\n\
1723--{inner_boundary}\r\n\
1724Content-Type: text/plain\r\n\
1725Content-Transfer-Encoding: 7bit\r\n\
1726\r\n\
1727nested plain text\r\n\
1728--{inner_boundary}\r\n\
1729\r\n\
1730--{inner_boundary}--\r\n"
1731        );
1732
1733        email.parts(part, "raw".to_string()).unwrap();
1734        assert!(email.body_text.contains("nested plain text"));
1735    }
1736
1737    #[test]
1738    fn test_set_files_returns_error_when_create_fails() {
1739        let mut email = AnalyzeEmails::default();
1740        email.files = object! {};
1741
1742        let filename = "test\0file.txt";
1743
1744        let err = email
1745            .set_files(
1746                ContentTransferEncoding::Bit7,
1747                "ignored",
1748                filename,
1749                "application/octet-stream".to_string(),
1750            )
1751            .unwrap_err();
1752
1753        assert!(err.to_string().contains("打开(创建)临时文件"));
1754    }
1755
1756    #[test]
1757    fn test_content_transfer_encoding_decode_quoted_printable() {
1758        let mut enc = ContentTransferEncoding::QuotedPrintable;
1759        let data = b"Hello=20World=21".to_vec();
1760        let result = enc.decode(data).unwrap();
1761        assert_eq!(result, b"Hello World!");
1762    }
1763}