br_email/
analyze.rs

1use br_crypto::encoding::code_to_utf8;
2use chrono::{DateTime, Local, TimeZone};
3use json::{object, JsonValue};
4use regex::Regex;
5use std::collections::HashMap;
6use std::ffi::OsStr;
7use std::io::{Error, Write};
8use std::{env, fs, io};
9
10const MAX_DEPTH: usize = 10;
11
12/// 解析邮件
13#[derive(Debug)]
14pub struct AnalyzeEmails {
15    pub debug: bool,
16    pub header: HashMap<String, String>,
17    pub mime_version: String,
18    boundary: String,
19    pub md5: String,
20    pub size: usize,
21    /// 时间戳
22    pub timestamp: i64,
23    /// 本地时间
24    pub datetime: String,
25    /// 主题
26    pub subject: String,
27    /// 发件人
28    pub from: HashMap<String, String>,
29    /// 收件人
30    pub to: HashMap<String, String>,
31    /// 抄送人
32    pub cc: HashMap<String, String>,
33    /// 用于指定收件人回复邮件时应该使用的电子邮件地址
34    pub replyto: HashMap<String, String>,
35    /// 内容类型
36    pub content_type: String,
37    /// 编码规则
38    pub content_transfer_encoding: ContentTransferEncoding,
39    /// 实际发件人
40    pub sender: String,
41    pub body_text: String,
42    pub body_html: String,
43    pub files: JsonValue,
44    pub charset: String,
45    /// 顶层附件文件名（来自 Content-Type name= 或 Content-Disposition filename=）
46    content_filename: String,
47    /// 递归深度（防止恶意嵌套导致栈溢出）
48    depth: usize,
49    /// 附件临时目录（默认 env::temp_dir()，可通过 new_with_temp_dir 自定义）
50    temp_dir: Option<std::path::PathBuf>,
51}
52
53impl AnalyzeEmails {
54    pub fn new(data: Vec<u8>, debug: bool) -> io::Result<AnalyzeEmails> {
55        Self::init(data, debug, None)
56    }
57
58    /// Create with custom temp directory for attachment extraction.
59    /// Uses md5-prefixed filenames to avoid collisions between parallel parses.
60    pub fn new_with_temp_dir(
61        data: Vec<u8>,
62        debug: bool,
63        temp_dir: std::path::PathBuf,
64    ) -> io::Result<AnalyzeEmails> {
65        Self::init(data, debug, Some(temp_dir))
66    }
67
68    fn init(
69        mut data: Vec<u8>,
70        debug: bool,
71        temp_dir: Option<std::path::PathBuf>,
72    ) -> io::Result<AnalyzeEmails> {
73        let md5 = br_crypto::md5::encrypt_hex(&data.clone()).to_string();
74        let size = data.len();
75        let data_string = String::from_utf8_lossy(&data).to_string();
76        if data_string.contains("\n\n") {
77            let updated_string = data_string.replace("\n", "\r\n");
78            data = updated_string.as_bytes().to_vec();
79        }
80
81        let subsequence = "\r\n\r\n".as_bytes();
82
83        let (header, body) = match data
84            .windows(subsequence.len())
85            .position(|window| window == subsequence)
86        {
87            None => {
88                if debug {
89                    fs::write(
90                        format!(
91                            "{}/xygs-{}.eml",
92                            env::current_dir()
93                                .unwrap_or_default()
94                                .to_str()
95                                .unwrap_or("."),
96                            md5
97                        ),
98                        data.clone(),
99                    )?;
100                }
101                return Err(Error::other(format!("协议格式错误: {md5}")));
102            }
103            Some(e) => (data[..e].to_vec(), data[e + 4..].to_vec()),
104        };
105        let mut that = Self {
106            debug,
107            header: Default::default(),
108            mime_version: "".to_string(),
109            boundary: "".to_string(),
110            md5,
111            size,
112            timestamp: 0,
113            subject: "".to_string(),
114            from: Default::default(),
115            to: Default::default(),
116            cc: Default::default(),
117            replyto: Default::default(),
118            datetime: "".to_string(),
119            content_type: "".to_string(),
120            content_transfer_encoding: ContentTransferEncoding::Bit7,
121            sender: "".to_string(),
122            body_text: "".to_string(),
123            body_html: "".to_string(),
124            files: object! {},
125            charset: "utf-8".to_string(),
126            content_filename: "".to_string(),
127            depth: 0,
128            temp_dir,
129        };
130        that.header(header)?;
131        if that.content_type.is_empty() {
132            that.content_type = "text/plain".to_string();
133        }
134        that.body(body, data_string)?;
135        Ok(that)
136    }
137
138    fn header(&mut self, data: Vec<u8>) -> io::Result<()> {
139        let data = String::from_utf8_lossy(&data).to_string();
140        let data = data.replace("\r\n\t", "").replace("\r\n ", " ");
141        for item in data.lines() {
142            let (key, value) = match item.find(": ") {
143                Some(e) => (item[..e].to_string(), item[e + 2..].to_string()),
144                None => match item.find(":") {
145                    Some(e) => (item[..e].to_string(), item[e + 1..].to_string()),
146                    None => continue,
147                },
148            };
149            let name = key.to_lowercase();
150            if value.is_empty() {
151                continue;
152            }
153            match key.to_lowercase().as_str() {
154                "mime-version" => self.mime_version = value.to_string(),
155                "from" => {
156                    self.from = self.from(&value);
157                }
158                "sender" => {
159                    self.sender = value.to_string();
160                }
161                "to" => {
162                    self.to = self.email_encoded(&value);
163                }
164                "cc" => {
165                    self.cc = self.email_encoded(&value);
166                }
167                "reply-to" => {
168                    self.replyto = self.email_encoded(&value);
169                }
170                "subject" => {
171                    self.subject = self.subject(value.to_string());
172                }
173                "content-type" => {
174                    let types = value.split(";").collect::<Vec<&str>>();
175                    self.content_type = types[0].trim().to_lowercase().to_string();
176                    match self.content_type.as_str() {
177                        "multipart/mixed"
178                        | "multipart/alternative"
179                        | "multipart/related"
180                        | "multipart/report"
181                        | "multipart/encrypted"
182                        | "multipart/digest" => match types[1].find("boundary=") {
183                            None => {}
184                            Some(e) => {
185                                let boundary = &types[1][e..];
186                                self.boundary = boundary
187                                    .trim()
188                                    .trim_start_matches("boundary=")
189                                    .trim_start_matches("\"")
190                                    .trim_end_matches("\"")
191                                    .to_string();
192                            }
193                        },
194                        _ => {}
195                    }
196                    if types.len() > 1 {
197                        for item in types.iter() {
198                            let trimmed = item.trim();
199                            if trimmed.contains("charset=") {
200                                self.charset = trimmed
201                                    .split("charset=")
202                                    .last()
203                                    .unwrap_or("")
204                                    .trim_start_matches('"')
205                                    .trim_end_matches('"')
206                                    .to_string();
207                            }
208                            if trimmed.starts_with("name=") {
209                                self.content_filename =
210                                    self.encoded(trimmed.trim_start_matches("name="));
211                            }
212                        }
213                    }
214                }
215                "content-transfer-encoding" => {
216                    self.content_transfer_encoding = ContentTransferEncoding::from(&value);
217                }
218                "date" => self.datetime(&value)?,
219                "content-disposition" => {
220                    if self.content_filename.is_empty() && value.contains("filename=") {
221                        self.content_filename = value.split("filename=").collect::<Vec<&str>>()[1]
222                            .trim_start_matches('"')
223                            .trim_end_matches('"')
224                            .to_string();
225                    }
226                    if self.content_filename.is_empty() && value.contains("filename*=utf-8''") {
227                        self.content_filename =
228                            value.split("filename*=utf-8''").collect::<Vec<&str>>()[1]
229                                .trim_start_matches('"')
230                                .trim_end_matches('"')
231                                .to_string();
232                        self.content_filename =
233                            br_crypto::encoding::urlencoding_decode(self.content_filename.as_str());
234                    }
235                }
236                _ => {
237                    self.header
238                        .insert(name.trim().to_string(), value.to_string());
239                }
240            }
241        }
242        Ok(())
243    }
244    fn body(&mut self, data: Vec<u8>, old_data: String) -> io::Result<()> {
245        match self.content_type.to_lowercase().as_str() {
246            "text/html" => {
247                let data = self.content_transfer_encoding.decode(data)?;
248                let res = code_to_utf8(self.charset.as_str(), data.clone());
249                self.body_html = res;
250            }
251            "text/plain" => {
252                let data = self.content_transfer_encoding.decode(data)?;
253                let res = code_to_utf8(self.charset.as_str(), data.clone());
254                self.body_text = res;
255            }
256            "multipart/mixed"
257            | "multipart/alternative"
258            | "multipart/related"
259            | "multipart/report"
260            | "multipart/signed"
261            | "multipart/encrypted"
262            | "multipart/digest" => {
263                let data = self.content_transfer_encoding.decode(data.clone())?;
264                let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
265                let mut parts_list = vec![];
266                let mut text = String::new();
267
268                parts = match parts.find(self.boundary.as_str()) {
269                    None => parts,
270                    Some(e) => parts[e..].to_string(),
271                };
272                for item in parts.lines() {
273                    if item.contains(self.boundary.as_str()) && text.is_empty() {
274                        continue;
275                    }
276                    if item.contains(self.boundary.as_str()) && !text.is_empty() {
277                        parts_list.push(text.clone());
278                        text = String::new();
279                        continue;
280                    }
281                    text.push_str(item);
282                    text.push_str("\r\n");
283                }
284                for part in parts_list {
285                    if part.trim().is_empty() {
286                        continue;
287                    }
288                    self.parts(part.to_string(), old_data.clone())?;
289                }
290            }
291            _ => {
292                if !self.content_filename.is_empty() {
293                    let data_str = String::from_utf8_lossy(&data).to_string();
294                    let encoding = self.content_transfer_encoding.clone();
295                    let filename = self.content_filename.clone();
296                    let ct = self.content_type.clone();
297                    self.set_files(encoding, &data_str, &filename, ct)?;
298                } else {
299                    log::warn!("未知body类型: {}, 已跳过", self.content_type);
300                }
301            }
302        }
303        Ok(())
304    }
305    /// 部分内容处理
306    fn parts(&mut self, data: String, old_data: String) -> io::Result<()> {
307        if self.depth >= MAX_DEPTH {
308            log::warn!("递归深度超过限制 {}, 已跳过", MAX_DEPTH);
309            return Ok(());
310        }
311        self.depth += 1;
312        let (header_str, body) = match data.find("\r\n\r\n") {
313            None => {
314                if self.debug {
315                    fs::write(
316                        format!(
317                            "{}/head-{}.eml",
318                            env::current_dir()
319                                .unwrap_or_default()
320                                .to_str()
321                                .unwrap_or("."),
322                            self.md5
323                        ),
324                        old_data.clone(),
325                    )?;
326                }
327                return Err(Error::other("解析附件头失败"));
328            }
329            Some(e) => (
330                data[..e].replace("\r\n\t", " ").replace("\r\n ", " "),
331                &data[e + 4..],
332            ),
333        };
334
335        let mut filename = "".to_string();
336        let mut content_type = String::new();
337        let mut boundary = String::new();
338        let mut content_transfer_encoding = ContentTransferEncoding::None;
339        for item in header_str.lines() {
340            let (key, value) = match item.find(": ") {
341                Some(e) => (&item[..e], &item[e + 2..]),
342                None => match item.find(":") {
343                    Some(e) => (&item[..e], &item[e + 1..]),
344                    None => continue,
345                },
346            };
347
348            let name = key.to_lowercase();
349
350            match name.trim() {
351                "content-transfer-encoding" => {
352                    content_transfer_encoding = ContentTransferEncoding::from(value)
353                }
354                "content-type" => {
355                    let types = value.trim().split(";").collect::<Vec<&str>>();
356                    content_type = types[0].trim().to_string();
357                    let name = types
358                        .iter()
359                        .filter(|&x| x.trim().starts_with("name="))
360                        .map(|&x| x.trim().to_string())
361                        .collect::<Vec<String>>();
362                    if !name.is_empty() {
363                        let name = name[0].trim_start_matches("name=");
364                        filename = self.encoded(name);
365                    }
366                    match value.find("boundary=") {
367                        None => {}
368                        Some(i) => {
369                            let mut b = &value[i + 9..];
370                            b = match b.find(";") {
371                                None => b,
372                                Some(i) => &b[..i],
373                            };
374                            boundary = b
375                                .trim_start_matches("\"")
376                                .trim_end_matches("\"")
377                                .to_string();
378                        }
379                    }
380                }
381                "content-id"
382                | "content-length"
383                | "mime-version"
384                | "content-description"
385                | "date"
386                | "x-attachment-id"
387                | "x-attachment-content-disposition" => {}
388                "content-disposition" => {
389                    if filename.is_empty() && value.contains("filename=") {
390                        filename = value.split("filename=").collect::<Vec<&str>>()[1]
391                            .trim_start_matches("\"")
392                            .trim_end_matches("\"")
393                            .to_string();
394                    }
395                    if filename.is_empty() && value.contains("filename*=utf-8''") {
396                        filename = value.split("filename*=utf-8''").collect::<Vec<&str>>()[1]
397                            .trim_start_matches("\"")
398                            .trim_end_matches("\"")
399                            .to_string();
400                        filename = br_crypto::encoding::urlencoding_decode(filename.as_str());
401                    }
402                }
403                _ => {
404                    log::debug!("parts 忽略未知 header: {name} [{item}]");
405                }
406            }
407        }
408
409        match content_type.as_str() {
410            "text/plain" => {
411                if filename.is_empty() {
412                    let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
413                    let text = code_to_utf8(self.charset.as_str(), res.clone());
414                    self.body_text = text;
415                } else {
416                    self.set_files(
417                        content_transfer_encoding,
418                        body,
419                        filename.as_str(),
420                        "".to_string(),
421                    )?;
422                }
423            }
424            "text/html" | "text/x-amp-html" => {
425                if filename.is_empty() {
426                    let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
427                    self.body_html = code_to_utf8(self.charset.as_str(), res.clone());
428                } else {
429                    self.set_files(
430                        content_transfer_encoding,
431                        body,
432                        filename.as_str(),
433                        "".to_string(),
434                    )?;
435                }
436            }
437            "multipart/mixed"
438            | "multipart/alternative"
439            | "multipart/related"
440            | "multipart/report"
441            | "multipart/signed"
442            | "multipart/encrypted"
443            | "multipart/digest" => {
444                let data = self
445                    .content_transfer_encoding
446                    .decode(body.as_bytes().to_vec())?;
447                let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
448
449                parts = match parts.find(boundary.as_str()) {
450                    None => parts,
451                    Some(e) => parts[e..].to_string(),
452                };
453
454                let mut parts_list = vec![];
455                let mut text = String::new();
456                for item in parts.lines() {
457                    if item.contains(&boundary) && text.is_empty() {
458                        continue;
459                    }
460                    if item.contains(&boundary) && !text.is_empty() {
461                        parts_list.push(text);
462                        text = String::new();
463                        continue;
464                    }
465                    text.push_str(item);
466                    text.push_str("\r\n");
467                }
468                for part in parts_list {
469                    if part.trim().is_empty() {
470                        continue;
471                    }
472                    self.parts(part.to_string(), old_data.clone())?;
473                }
474            }
475            "text/calendar" | "message/delivery-status" | "message/disposition-notification" => {}
476            "message/rfc822" => {
477                let data = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
478                let text = code_to_utf8(self.charset.as_str(), data);
479                match AnalyzeEmails::new(text.into_bytes(), self.debug) {
480                    Ok(nested) => {
481                        if self.body_text.is_empty() {
482                            self.body_text = nested.body_text;
483                        }
484                        if self.body_html.is_empty() {
485                            self.body_html = nested.body_html;
486                        }
487                        for (k, v) in nested.files.entries() {
488                            self.files[k] = v.clone();
489                        }
490                    }
491                    Err(e) => {
492                        log::warn!("解析嵌套邮件 message/rfc822 失败: {}", e);
493                    }
494                }
495            }
496            "application/octet-stream"
497            | "application/zip"
498            | "application/pdf"
499            | "image/jpeg"
500            | "image/png"
501            | "image/gif"
502            | "application/ics"
503            | "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
504            | "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
505            | "application/vnd.ms-excel" => {
506                if !filename.is_empty() {
507                    self.set_files(
508                        content_transfer_encoding,
509                        body,
510                        filename.as_str(),
511                        content_type.to_string(),
512                    )?;
513                }
514            }
515            _ => {
516                if !filename.is_empty() {
517                    self.set_files(
518                        content_transfer_encoding,
519                        body,
520                        filename.as_str(),
521                        content_type.to_string(),
522                    )?;
523                } else {
524                    if self.debug {
525                        fs::write(
526                            format!(
527                                "{}/content_type-{}.eml",
528                                env::current_dir()
529                                    .unwrap_or_default()
530                                    .to_str()
531                                    .unwrap_or("."),
532                                self.md5
533                            ),
534                            old_data.clone(),
535                        )?;
536                    }
537                    log::warn!(
538                        "未知 parts content_type 类型: {}, 无文件名已跳过",
539                        content_type
540                    );
541                }
542            }
543        }
544        Ok(())
545    }
546    pub fn from(&mut self, value: &str) -> HashMap<String, String> {
547        let mut r = value
548            .split("<")
549            .filter(|x| !x.trim().is_empty())
550            .map(|x| x.trim())
551            .collect::<Vec<&str>>();
552        if r[0].starts_with("\"") && r[0].ends_with("\"") {
553            r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"").trim();
554        }
555        let mut emails = HashMap::new();
556        if r.len() == 1 {
557            let name = r[0].trim_end_matches(">").to_string();
558            emails.insert(name.clone(), name);
559        } else {
560            let name = self.encoded(r[0].trim());
561            let email = r[1].trim_end_matches(">").to_string();
562            emails.insert(email, name);
563        }
564        emails
565    }
566    fn subject(&mut self, value: String) -> String {
567        let value = value.replace("?==?", "?=\r\n\t=?");
568        if !value.contains("=?") && !value.contains("?=") {
569            return value.to_string();
570        }
571        let list = value.split("\r\n\t").collect::<Vec<&str>>();
572        let mut txt = vec![];
573        for item in list {
574            txt.push(self.encoded(item));
575        }
576        txt.join("")
577    }
578
579    fn encoded(&mut self, value: &str) -> String {
580        let t = value
581            .trim_start_matches("\"")
582            .trim_end_matches("\"")
583            .replace("?==?", "?= =?");
584        if t.contains("=?") && t.contains("?=") {
585            let l = t.split(" ").collect::<Vec<&str>>();
586            let mut txt = vec![];
587            for item in l {
588                txt.push(self.encoded_line(item));
589            }
590            txt.join("")
591        } else {
592            t.to_string()
593        }
594    }
595    /// 段落解码
596    fn encoded_line(&mut self, value: &str) -> String {
597        let line = value.split("?").collect::<Vec<&str>>();
598        if line.len() < 4 {
599            return value.to_string();
600        }
601        let charset = line[1].to_lowercase();
602        let code = line[2].to_uppercase();
603        let data = line[3];
604
605        let strs = match code.as_str() {
606            "B" => br_crypto::base64::decode_u8(data),
607            "Q" => br_crypto::qp::decode(data).unwrap_or(vec![]),
608            _ => data.as_bytes().to_vec(),
609        };
610        let text = code_to_utf8(&charset, strs.clone());
611        text.chars().filter(|&x| x != '\u{200b}').collect()
612    }
613
614    /// 时间处理
615    fn datetime(&mut self, value: &str) -> io::Result<()> {
616        let re =
617            Regex::new(r"\s*\(.*\)$").map_err(|e| Error::other(format!("正则表达式错误: {e}")))?;
618        let datetime = re.replace(value, "").to_string();
619        let datetime = datetime.replace("GMT", "+0000").to_string();
620        let datetime = match datetime.find(",") {
621            None => datetime,
622            Some(i) => datetime[i + 1..].trim().to_string(),
623        };
624        // 归一化月份缩写大小写 ("NOV" -> "Nov", "nov" -> "Nov")
625        let datetime = datetime
626            .split_whitespace()
627            .map(|word| {
628                if word.len() == 3 && word.chars().all(|c| c.is_ascii_alphabetic()) {
629                    let mut chars = word.chars();
630                    match chars.next() {
631                        Some(f) => {
632                            format!("{}{}", f.to_uppercase(), chars.as_str().to_lowercase())
633                        }
634                        None => word.to_string(),
635                    }
636                } else {
637                    word.to_string()
638                }
639            })
640            .collect::<Vec<_>>()
641            .join(" ");
642        // 尝试多种日期格式：标准 RFC 2822 (dd Mon YYYY) 和美式 (Mon dd YYYY)
643        let formats = ["%d %b %Y %H:%M:%S %z", "%b %d %Y %H:%M:%S %z"];
644        let datetime = formats
645            .iter()
646            .find_map(|fmt| DateTime::parse_from_str(datetime.as_str(), fmt).ok())
647            .ok_or_else(|| {
648                Error::other(format!("时间解析失败: 所有格式均不匹配 [{datetime:?}]"))
649            })?;
650
651        self.timestamp = datetime.timestamp();
652        self.datetime = Local
653            .timestamp_opt(self.timestamp, 0)
654            .single()
655            .map(|dt| {
656                dt.with_timezone(&Local)
657                    .format("%Y-%m-%d %H:%M:%S")
658                    .to_string()
659            })
660            .unwrap_or_default();
661        Ok(())
662    }
663    pub fn email_encoded(&mut self, value: &str) -> HashMap<String, String> {
664        let list = value.split(",").map(|x| x.trim()).collect::<Vec<&str>>();
665        let mut emails = HashMap::new();
666        for item in list {
667            let mut r = item.split(" <").collect::<Vec<&str>>();
668            if r[0].starts_with("\"") && r[0].ends_with("\"") {
669                r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"");
670            }
671            if r.len() == 1 {
672                let name = r[0]
673                    .trim_start_matches("<")
674                    .trim_end_matches(">")
675                    .to_string();
676                emails.insert(name.clone(), name);
677            } else {
678                let name = self.encoded(r[0].trim());
679                let email = r[1].trim_end_matches(">").to_string();
680                emails.insert(email, name);
681            }
682        }
683        emails
684    }
685    fn set_files(
686        &mut self,
687        mut content_transfer_encoding: ContentTransferEncoding,
688        body: &str,
689        filename: &str,
690        mut content_type: String,
691    ) -> io::Result<()> {
692        // 文件名安全处理：移除路径穿越字符，只保留文件名部分
693        let filename = filename
694            .replace("\\", "/")
695            .split('/')
696            .next_back()
697            .unwrap_or(filename)
698            .replace("..", "")
699            .trim_start_matches('.')
700            .to_string();
701        let filename = if filename.is_empty() {
702            "unnamed".to_string()
703        } else {
704            filename
705        };
706        let filename = filename.as_str();
707        let mut data_str = String::new();
708        if let ContentTransferEncoding::Base64 = content_transfer_encoding {
709            let mut text = "".to_string();
710            for line in body.lines() {
711                text += line;
712            }
713            data_str = text;
714        }
715
716        let body = content_transfer_encoding.decode(data_str.as_bytes().to_vec())?;
717        let md5 = br_crypto::md5::encrypt_hex(&body.clone());
718        let size = body.len();
719        // Use custom temp_dir if set, otherwise system temp; prefix filename with md5 to avoid collisions
720        let base_dir = self.temp_dir.clone().unwrap_or_else(env::temp_dir);
721        let safe_filename = format!("{md5}_{filename}");
722        let mut file_path = base_dir.clone();
723        file_path.push(&safe_filename);
724        let path_temp_dir = file_path.clone();
725        // Ensure the directory exists (for custom temp_dir)
726        if let Some(parent) = file_path.parent() {
727            let _ = fs::create_dir_all(parent);
728        }
729
730        let mut temp_file = match fs::File::create(file_path.clone()) {
731            Ok(e) => e,
732            Err(e) => {
733                return Err(Error::other(format!(
734                    "打开（创建）临时文件: {e} [{filename}]"
735                )))
736            }
737        };
738
739        if temp_file.write(body.as_slice()).is_ok() {
740            if content_type.is_empty() {
741                content_type = path_temp_dir
742                    .extension()
743                    .unwrap_or(OsStr::new("unknown"))
744                    .to_str()
745                    .unwrap_or("unknown")
746                    .to_string();
747            }
748
749            self.files[md5.as_str()] = object! {
750                name:filename,
751                md5:md5.clone(),
752                size:size,
753                "content-type":content_type.clone(),
754                file:file_path.to_str()
755            };
756        };
757        Ok(())
758    }
759}
760
761impl Default for AnalyzeEmails {
762    fn default() -> Self {
763        Self {
764            debug: false,
765            header: Default::default(),
766            mime_version: "".to_string(),
767            boundary: "".to_string(),
768            md5: "".to_string(),
769            size: 0,
770            timestamp: 0,
771            datetime: "".to_string(),
772            subject: "".to_string(),
773            from: Default::default(),
774            to: Default::default(),
775            cc: Default::default(),
776            replyto: Default::default(),
777            content_type: "".to_string(),
778            content_transfer_encoding: ContentTransferEncoding::None,
779            sender: "".to_string(),
780            body_text: "".to_string(),
781            body_html: "".to_string(),
782            files: JsonValue::Null,
783            charset: "".to_string(),
784            content_filename: "".to_string(),
785            depth: 0,
786            temp_dir: None,
787        }
788    }
789}
790
791/// 编码规则
792/// 选择 Content-Transfer-Encoding 的原则
793///
794/// 纯文本: 如果内容是纯文本且只包含 ASCII 字符，通常使用 7bit。
795/// 非 ASCII 文本: 如果内容包含非 ASCII 字符，可以使用 quoted-printable 或 8bit，具体取决于内容和兼容性要求。
796/// 二进制数据: 对于图像、视频、音频等二进制数据，通常使用 base64 编码。
797#[derive(Debug, Clone)]
798pub enum ContentTransferEncoding {
799    /// 这种编码方式主要用于编码文本数据，它保持大部分文本的可读性，但会对非 ASCII 字符和特殊字符（如 =, ?, & 等）进行编码，以确保兼容性。
800    /// 适用于包含大量特殊字符或非 ASCII 文本的邮件内容。
801    QuotedPrintable,
802    ///    将二进制数据编码为 ASCII 字符串，使用 64 个字符的字母表（A-Z, a-z, 0-9, +, /）表示二进制数据。每 3 个字节的二进制数据编码为 4 个字符，便于在邮件中传输。
803    /// 常用于编码附件、图像、音频、视频等二进制数据。
804    Base64,
805    /// 表示内容是二进制数据，不能被转义或编码，必须保持原始的二进制格式进行传输。这种编码方式通常用于图像、音频等二进制文件。
806    /// 这种编码要求邮件传输代理能够处理所有可能的字节值，几乎不做任何转换，因此也不是所有系统都支持。
807    Binary,
808    /// 表示内容包含 8 位字符，这意味着它可能包含非 ASCII 字符（如带有音标的字母）。尽管这样编码的邮件可以包含更多字符，但并非所有邮件传输代理都支持 8bit 传输。
809    /// 适用于非 ASCII 的文本数据，但需要确保邮件传输链路支持 8bit 数据传输。
810    Bit8,
811    /// 表示内容是 ASCII 文本，仅包含 7 位字符（即标准 ASCII 字符集），每个字符的最高位是 0。这种编码方式是最常用的，因为它适合绝大多数邮件传输系统。
812    /// 适用于纯文本邮件，不包含任何特殊字符或二进制数据。
813    Bit7,
814    None,
815}
816
817impl ContentTransferEncoding {
818    fn from(value: &str) -> Self {
819        match value.to_lowercase().as_str() {
820            "7bit" => Self::Bit7,
821            "8bit" => Self::Bit8,
822            "binary" => Self::Binary,
823            "base64" => Self::Base64,
824            "quoted-printable" => Self::QuotedPrintable,
825            _ => Self::None,
826        }
827    }
828    fn decode(&mut self, mut data: Vec<u8>) -> io::Result<Vec<u8>> {
829        let res = match self {
830            ContentTransferEncoding::QuotedPrintable => br_crypto::qp::decode(data)?,
831            ContentTransferEncoding::Base64 => {
832                let str = String::from_utf8_lossy(&data).to_string();
833                let mut text = "".to_string();
834                for line in str.lines() {
835                    text += line;
836                }
837                data = text.as_bytes().to_vec();
838                br_crypto::base64::decode_u8(data)
839            }
840            ContentTransferEncoding::Binary => data,
841            ContentTransferEncoding::Bit8 => data,
842            ContentTransferEncoding::Bit7 => data,
843            ContentTransferEncoding::None => data,
844        };
845        Ok(res)
846    }
847}
848
849#[cfg(test)]
850#[allow(clippy::field_reassign_with_default)]
851mod tests {
852    use super::*;
853    use std::time::{SystemTime, UNIX_EPOCH};
854    use std::{env, fs};
855
856    fn unique_token(prefix: &str) -> String {
857        let nanos = SystemTime::now()
858            .duration_since(UNIX_EPOCH)
859            .unwrap()
860            .as_nanos();
861        format!("{prefix}-{nanos}-{}", std::process::id())
862    }
863
864    fn multipart_email(content_type: &str, boundary: &str, part: &str) -> Vec<u8> {
865        format!(
866            "From: sender@example.com\r\n\
867To: receiver@example.com\r\n\
868Subject: Multipart Test\r\n\
869Content-Type: {content_type};boundary=\"{boundary}\";charset=\"utf-8\"\r\n\
870Content-Transfer-Encoding: 7bit\r\n\
871Date: Mon, 01 Jan 2024 12:00:00 GMT (UTC)\r\n\
872\r\n\
873--{boundary}\r\n\
874{part}\r\n\
875--{boundary}--\r\n"
876        )
877        .into_bytes()
878    }
879
880    #[test]
881    fn test_content_transfer_encoding_from() {
882        assert!(matches!(
883            ContentTransferEncoding::from("7bit"),
884            ContentTransferEncoding::Bit7
885        ));
886        assert!(matches!(
887            ContentTransferEncoding::from("8bit"),
888            ContentTransferEncoding::Bit8
889        ));
890        assert!(matches!(
891            ContentTransferEncoding::from("base64"),
892            ContentTransferEncoding::Base64
893        ));
894        assert!(matches!(
895            ContentTransferEncoding::from("BASE64"),
896            ContentTransferEncoding::Base64
897        ));
898        assert!(matches!(
899            ContentTransferEncoding::from("quoted-printable"),
900            ContentTransferEncoding::QuotedPrintable
901        ));
902        assert!(matches!(
903            ContentTransferEncoding::from("binary"),
904            ContentTransferEncoding::Binary
905        ));
906        assert!(matches!(
907            ContentTransferEncoding::from("unknown"),
908            ContentTransferEncoding::None
909        ));
910    }
911
912    #[test]
913    fn test_content_transfer_encoding_decode_7bit() {
914        let mut enc = ContentTransferEncoding::Bit7;
915        let data = b"Hello World".to_vec();
916        let result = enc.decode(data.clone()).unwrap();
917        assert_eq!(result, data);
918    }
919
920    #[test]
921    fn test_content_transfer_encoding_decode_8bit() {
922        let mut enc = ContentTransferEncoding::Bit8;
923        let data = "你好世界".as_bytes().to_vec();
924        let result = enc.decode(data.clone()).unwrap();
925        assert_eq!(result, data);
926    }
927
928    #[test]
929    fn test_content_transfer_encoding_decode_base64() {
930        let mut enc = ContentTransferEncoding::Base64;
931        let data = b"SGVsbG8gV29ybGQ=".to_vec();
932        let result = enc.decode(data).unwrap();
933        assert_eq!(result, b"Hello World");
934    }
935
936    #[test]
937    fn test_analyze_emails_default() {
938        let email = AnalyzeEmails::default();
939        assert!(!email.debug);
940        assert_eq!(email.size, 0);
941        assert_eq!(email.timestamp, 0);
942        assert!(email.subject.is_empty());
943        assert!(email.from.is_empty());
944        assert!(email.to.is_empty());
945    }
946
947    #[test]
948    fn test_analyze_simple_email() {
949        let email_data = b"From: sender@example.com\r\n\
950To: receiver@example.com\r\n\
951Subject: Test Subject\r\n\
952Content-Type: text/plain\r\n\
953Date: 01 Jan 2024 12:00:00 +0000\r\n\
954\r\n\
955Hello, this is a test email body."
956            .to_vec();
957
958        let result = AnalyzeEmails::new(email_data, false).unwrap();
959        assert_eq!(result.subject, "Test Subject");
960        assert_eq!(result.content_type, "text/plain");
961        assert!(result.from.contains_key("sender@example.com"));
962        assert!(result.to.contains_key("receiver@example.com"));
963        assert_eq!(result.body_text, "Hello, this is a test email body.");
964    }
965
966    #[test]
967    fn test_analyze_email_with_encoded_subject() {
968        let email_data = b"From: test@example.com\r\n\
969To: receiver@example.com\r\n\
970Subject: =?UTF-8?B?5rWL6K+V5Li76aKY?=\r\n\
971Content-Type: text/plain\r\n\
972Date: 01 Jan 2024 12:00:00 +0000\r\n\
973\r\n\
974Test body"
975            .to_vec();
976
977        let result = AnalyzeEmails::new(email_data, false).unwrap();
978        assert!(result.subject.contains("测试主题"));
979    }
980
981    #[test]
982    fn test_analyze_email_html() {
983        let email_data = b"From: sender@example.com\r\n\
984To: receiver@example.com\r\n\
985Subject: HTML Test\r\n\
986Content-Type: text/html\r\n\
987Date: 01 Jan 2024 12:00:00 +0000\r\n\
988\r\n\
989<html><body><h1>Hello</h1></body></html>"
990            .to_vec();
991
992        let result = AnalyzeEmails::new(email_data, false).unwrap();
993        assert_eq!(result.content_type, "text/html");
994        assert!(result.body_html.contains("<h1>Hello</h1>"));
995    }
996
997    #[test]
998    fn test_analyze_email_invalid_format() {
999        let invalid_data = b"This is not a valid email".to_vec();
1000        let result = AnalyzeEmails::new(invalid_data, false);
1001        assert!(result.is_err());
1002    }
1003
1004    #[test]
1005    fn test_from_parsing() {
1006        let mut email = AnalyzeEmails::default();
1007
1008        let result = email.from(r#""John Doe" <john@example.com>"#);
1009        assert_eq!(result.get("john@example.com").unwrap(), "John Doe");
1010
1011        let result = email.from(r#"<simple@example.com>"#);
1012        assert_eq!(
1013            result.get("simple@example.com").unwrap(),
1014            "simple@example.com"
1015        );
1016    }
1017
1018    #[test]
1019    fn test_email_encoded_parsing() {
1020        let mut email = AnalyzeEmails::default();
1021
1022        let result = email.email_encoded(r#"<a@test.com>, <b@test.com>"#);
1023        assert!(result.contains_key("a@test.com"));
1024        assert!(result.contains_key("b@test.com"));
1025    }
1026
1027    #[test]
1028    fn test_analyze_email_with_cc() {
1029        let email_data = b"From: sender@example.com\r\n\
1030To: receiver@example.com\r\n\
1031Cc: cc1@example.com, cc2@example.com\r\n\
1032Subject: CC Test\r\n\
1033Content-Type: text/plain\r\n\
1034Date: 01 Jan 2024 12:00:00 +0000\r\n\
1035\r\n\
1036Test body"
1037            .to_vec();
1038
1039        let result = AnalyzeEmails::new(email_data, false).unwrap();
1040        assert!(result.cc.contains_key("cc1@example.com"));
1041        assert!(result.cc.contains_key("cc2@example.com"));
1042    }
1043
1044    #[test]
1045    fn test_content_transfer_encoding_decode_binary() {
1046        let mut enc = ContentTransferEncoding::Binary;
1047        let data = vec![0x00, 0x01, 0x02, 0xFF, 0xFE];
1048        let result = enc.decode(data.clone()).unwrap();
1049        assert_eq!(result, data);
1050    }
1051
1052    #[test]
1053    fn test_content_transfer_encoding_decode_none() {
1054        let mut enc = ContentTransferEncoding::None;
1055        let data = b"raw data".to_vec();
1056        let result = enc.decode(data.clone()).unwrap();
1057        assert_eq!(result, data);
1058    }
1059
1060    #[test]
1061    fn test_analyze_email_with_reply_to() {
1062        let email_data = b"From: sender@example.com\r\n\
1063To: receiver@example.com\r\n\
1064Reply-To: reply@example.com\r\n\
1065Subject: Reply-To Test\r\n\
1066Content-Type: text/plain\r\n\
1067Date: 01 Jan 2024 12:00:00 +0000\r\n\
1068\r\n\
1069Test body"
1070            .to_vec();
1071
1072        let result = AnalyzeEmails::new(email_data, false).unwrap();
1073        assert!(result.replyto.contains_key("reply@example.com"));
1074    }
1075
1076    #[test]
1077    fn test_analyze_email_with_sender() {
1078        let email_data = b"From: sender@example.com\r\n\
1079Sender: actual-sender@example.com\r\n\
1080To: receiver@example.com\r\n\
1081Subject: Sender Test\r\n\
1082Content-Type: text/plain\r\n\
1083Date: 01 Jan 2024 12:00:00 +0000\r\n\
1084\r\n\
1085Test body"
1086            .to_vec();
1087
1088        let result = AnalyzeEmails::new(email_data, false).unwrap();
1089        assert_eq!(result.sender, "actual-sender@example.com");
1090    }
1091
1092    #[test]
1093    fn test_analyze_email_with_mion() {
1094        let email_data = b"From: sender@example.com\r\n\
1095To: receiver@example.com\r\n\
1096MIME-Version: 1.0\r\n\
1097Subject: MIME Test\r\n\
1098Content-Type: text/plain\r\n\
1099Date: 01 Jan 2024 12:00:00 +0000\r\n\
1100\r\n\
1101Test body"
1102            .to_vec();
1103
1104        let result = AnalyzeEmails::new(email_data, false).unwrap();
1105        assert_eq!(result.mime_version, "1.0");
1106    }
1107
1108    #[test]
1109    fn test_analyze_email_lf_only() {
1110        let email_data = b"From: sender@example.com\n\
1111To: receiver@example.com\n\
1112Subject: LF Only Test\n\
1113Content-Type: text/plain\n\
1114Date: 01 Jan 2024 12:00:00 +0000\n\
1115\n\
1116Test body with LF only"
1117            .to_vec();
1118
1119        let result = AnalyzeEmails::new(email_data, false).unwrap();
1120        assert_eq!(result.subject, "LF Only Test");
1121    }
1122
1123    #[test]
1124    fn test_analyze_email_with_custom_header() {
1125        let email_data = b"From: sender@example.com\r\n\
1126To: receiver@example.com\r\n\
1127X-Custom-Header: custom-value\r\n\
1128Subject: Custom Header Test\r\n\
1129Content-Type: text/plain\r\n\
1130Date: 01 Jan 2024 12:00:00 +0000\r\n\
1131\r\n\
1132Test body"
1133            .to_vec();
1134
1135        let result = AnalyzeEmails::new(email_data, false).unwrap();
1136        assert_eq!(
1137            result.header.get("x-custom-header").unwrap(),
1138            "custom-value"
1139        );
1140    }
1141
1142    #[test]
1143    fn test_analyze_email_base64_body() {
1144        let email_data = b"From: sender@example.com\r\n\
1145To: receiver@example.com\r\n\
1146Subject: Base64 Test\r\n\
1147Content-Type: text/plain\r\n\
1148Content-Transfer-Encoding: base64\r\n\
1149Date: 01 Jan 2024 12:00:00 +0000\r\n\
1150\r\n\
1151SGVsbG8gV29ybGQ="
1152            .to_vec();
1153
1154        let result = AnalyzeEmails::new(email_data, false).unwrap();
1155        assert_eq!(result.body_text, "Hello World");
1156    }
1157
1158    #[test]
1159    fn test_from_parsing_simple_email() {
1160        let mut email = AnalyzeEmails::default();
1161        let result = email.from("user@example.com");
1162        assert!(result.contains_key("user@example.com"));
1163    }
1164
1165    #[test]
1166    fn test_email_encoded_with_name() {
1167        let mut email = AnalyzeEmails::default();
1168        let result = email.email_encoded(r#"John Doe <john@example.com>"#);
1169        assert_eq!(result.get("john@example.com").unwrap(), "John Doe");
1170    }
1171
1172    #[test]
1173    fn test_analyze_invalid_email_debug_writes_eml() {
1174        let invalid_data = b"invalid-email-without-separator".to_vec();
1175        let md5 = br_crypto::md5::encrypt_hex(&invalid_data);
1176        let path = env::current_dir().unwrap().join(format!("xygs-{md5}.eml"));
1177        let _ = fs::remove_file(&path);
1178
1179        let result = AnalyzeEmails::new(invalid_data.clone(), true);
1180        assert!(result.is_err());
1181        assert!(path.exists());
1182        assert_eq!(fs::read(&path).unwrap(), invalid_data);
1183
1184        let _ = fs::remove_file(path);
1185    }
1186
1187    #[test]
1188    fn test_header_colon_only_and_empty_value_skip() {
1189        let email_data = b"From: sender@example.com\r\n\
1190To: receiver@example.com\r\n\
1191Subject:Colon Header\r\n\
1192Content-Type:text/plain;charset=\"utf-8\"\r\n\
1193X-No-Space:value-without-space\r\n\
1194X-Empty:\r\n\
1195Date:Mon, 01 Jan 2024 12:00:00 GMT\r\n\
1196\r\n\
1197Body"
1198            .to_vec();
1199
1200        let result = AnalyzeEmails::new(email_data, false).unwrap();
1201        assert_eq!(result.subject, "Colon Header");
1202        assert_eq!(result.charset, "utf-8");
1203        assert_eq!(
1204            result.header.get("x-no-space").unwrap(),
1205            "value-without-space"
1206        );
1207        assert!(!result.header.contains_key("x-empty"));
1208    }
1209
1210    #[test]
1211    fn test_header_line_without_any_colon_is_skipped() {
1212        let email_data = b"From: sender@example.com\r\n\
1213To: receiver@example.com\r\n\
1214no-colon-line-here\r\n\
1215Subject: Test\r\n\
1216Content-Type: text/plain\r\n\
1217Date: 01 Jan 2024 12:00:00 +0000\r\n\
1218\r\n\
1219body"
1220            .to_vec();
1221
1222        let result = AnalyzeEmails::new(email_data, false).unwrap();
1223        assert_eq!(result.subject, "Test");
1224    }
1225
1226    #[test]
1227    fn test_multipart_body_parsing_for_all_supported_types() {
1228        let content_types = [
1229            "multipart/mixed",
1230            "multipart/alternative",
1231            "multipart/related",
1232            "multipart/report",
1233        ];
1234
1235        for content_type in content_types {
1236            let boundary = unique_token("boundary");
1237            let email_data = multipart_email(
1238                content_type,
1239                boundary.as_str(),
1240                "Content-Type: text/plain\r\nContent-Transfer-Encoding: 7bit\r\n\r\nHello multipart body",
1241            );
1242            let result = AnalyzeEmails::new(email_data, false).unwrap();
1243
1244            assert_eq!(result.content_type, content_type);
1245            assert_eq!(result.charset, "utf-8");
1246            assert!(result.body_text.contains("Hello multipart body"));
1247        }
1248    }
1249
1250    #[test]
1251    fn test_parts_header_parse_failure_debug_writes_file() {
1252        let mut email = AnalyzeEmails::default();
1253        email.debug = true;
1254        email.md5 = unique_token("head");
1255        email.files = object! {};
1256
1257        let path = env::current_dir()
1258            .unwrap()
1259            .join(format!("head-{}.eml", email.md5));
1260        let _ = fs::remove_file(&path);
1261
1262        let result = email.parts(
1263            "invalid-part-content".to_string(),
1264            "raw-email-data".to_string(),
1265        );
1266        assert!(result.is_err());
1267        assert!(path.exists());
1268        assert_eq!(fs::read_to_string(&path).unwrap(), "raw-email-data");
1269
1270        let _ = fs::remove_file(path);
1271    }
1272
1273    #[test]
1274    fn test_parts_unknown_header_is_ignored() {
1275        let mut email = AnalyzeEmails::default();
1276        email.charset = "utf-8".to_string();
1277        email.files = object! {};
1278
1279        let result = email.parts(
1280            "X-Unknown: value\r\nContent-Type: text/plain\r\n\r\nbody".to_string(),
1281            "raw".to_string(),
1282        );
1283        assert!(result.is_ok());
1284        assert_eq!(email.body_text, "body");
1285    }
1286
1287    #[test]
1288    fn test_parts_text_plain_with_name_as_attachment() {
1289        let mut email = AnalyzeEmails::default();
1290        email.charset = "utf-8".to_string();
1291        email.files = object! {};
1292
1293        let filename = format!("{}.txt", unique_token("plain-attachment"));
1294        let part = format!(
1295            "Content-Type:text/plain; name=\"{filename}\"\r\nContent-Transfer-Encoding:base64\r\n\r\nSGVsbG8gQXR0YWNobWVudA=="
1296        );
1297
1298        email.parts(part, "raw".to_string()).unwrap();
1299
1300        let body = b"Hello Attachment".to_vec();
1301        let md5 = br_crypto::md5::encrypt_hex(&body);
1302        let entry = &email.files[md5.as_str()];
1303
1304        assert_eq!(entry["name"].as_str().unwrap(), filename);
1305        assert_eq!(entry["content-type"].as_str().unwrap(), "txt");
1306        let path = entry["file"].as_str().unwrap();
1307        assert_eq!(fs::read(path).unwrap(), body);
1308
1309        let _ = fs::remove_file(path);
1310    }
1311
1312    #[test]
1313    fn test_parts_text_html_with_name_as_attachment() {
1314        let mut email = AnalyzeEmails::default();
1315        email.charset = "utf-8".to_string();
1316        email.files = object! {};
1317
1318        let filename = format!("{}.html", unique_token("html-attachment"));
1319        let part = format!(
1320            "Content-Type: text/html; name=\"{filename}\"\r\nContent-Transfer-Encoding: base64\r\n\r\nPGgxPkhlbGxvIEhUTUwgQXR0YWNobWVudDwvaDE+"
1321        );
1322
1323        email.parts(part, "raw".to_string()).unwrap();
1324
1325        let body = b"<h1>Hello HTML Attachment</h1>".to_vec();
1326        let md5 = br_crypto::md5::encrypt_hex(&body);
1327        let entry = &email.files[md5.as_str()];
1328
1329        assert_eq!(entry["name"].as_str().unwrap(), filename);
1330        assert_eq!(entry["content-type"].as_str().unwrap(), "html");
1331        let path = entry["file"].as_str().unwrap();
1332        assert_eq!(fs::read(path).unwrap(), body);
1333
1334        let _ = fs::remove_file(path);
1335    }
1336
1337    #[test]
1338    fn test_parts_content_disposition_filename() {
1339        let mut email = AnalyzeEmails::default();
1340        email.charset = "utf-8".to_string();
1341        email.files = object! {};
1342
1343        let filename = format!("{}.pdf", unique_token("filename"));
1344        let part = format!(
1345            "Content-Type: application/pdf\r\nContent-Transfer-Encoding: base64\r\nContent-Disposition: attachment; filename=\"{filename}\"\r\n\r\nSGVsbG8gUERG"
1346        );
1347
1348        email.parts(part, "raw".to_string()).unwrap();
1349
1350        let body = b"Hello PDF".to_vec();
1351        let md5 = br_crypto::md5::encrypt_hex(&body);
1352        let entry = &email.files[md5.as_str()];
1353
1354        assert_eq!(entry["name"].as_str().unwrap(), filename);
1355        assert_eq!(entry["content-type"].as_str().unwrap(), "application/pdf");
1356        let path = entry["file"].as_str().unwrap();
1357        assert_eq!(fs::read(path).unwrap(), body);
1358
1359        let _ = fs::remove_file(path);
1360    }
1361
1362    #[test]
1363    fn test_parts_content_disposition_filename_utf8_star() {
1364        let mut email = AnalyzeEmails::default();
1365        email.charset = "utf-8".to_string();
1366        email.files = object! {};
1367
1368        let part = "Content-Type: application/octet-stream\r\n\
1369Content-Transfer-Encoding: base64\r\n\
1370Content-Disposition: attachment; filename*=utf-8''hello%20world.txt\r\n\
1371\r\n\
1372SGVsbG8gVVJMIEZpbGU="
1373            .to_string();
1374
1375        email.parts(part, "raw".to_string()).unwrap();
1376
1377        let body = b"Hello URL File".to_vec();
1378        let md5 = br_crypto::md5::encrypt_hex(&body);
1379        let entry = &email.files[md5.as_str()];
1380
1381        assert_eq!(entry["name"].as_str().unwrap(), "hello world.txt");
1382        assert_eq!(
1383            entry["content-type"].as_str().unwrap(),
1384            "application/octet-stream"
1385        );
1386        let path = entry["file"].as_str().unwrap();
1387        assert_eq!(fs::read(path).unwrap(), body);
1388
1389        let _ = fs::remove_file(path);
1390    }
1391
1392    #[test]
1393    fn test_parts_nested_multipart() {
1394        let mut email = AnalyzeEmails::default();
1395        email.charset = "utf-8".to_string();
1396        email.files = object! {};
1397
1398        let boundary = unique_token("inner-boundary");
1399        let part = format!(
1400            "Content-Type: multipart/alternative; boundary=\"{boundary}\"\r\n\
1401Content-Transfer-Encoding: 7bit\r\n\
1402\r\n\
1403--{boundary}\r\n\
1404Content-Type: text/plain\r\n\
1405Content-Transfer-Encoding: 7bit\r\n\
1406\r\n\
1407Nested text body\r\n\
1408--{boundary}--\r\n"
1409        );
1410
1411        email.parts(part, "raw".to_string()).unwrap();
1412        assert!(email.body_text.contains("Nested text body"));
1413    }
1414
1415    #[test]
1416    fn test_parts_text_calendar_is_skipped() {
1417        let mut email = AnalyzeEmails::default();
1418        email.charset = "utf-8".to_string();
1419        email.files = object! {};
1420        email.body_text = "keep-me".to_string();
1421
1422        let part = "Content-Type: text/calendar\r\n\
1423Content-Transfer-Encoding: 7bit\r\n\
1424\r\n\
1425BEGIN:VCALENDAR"
1426            .to_string();
1427
1428        email.parts(part, "raw".to_string()).unwrap();
1429        assert_eq!(email.body_text, "keep-me");
1430    }
1431
1432    #[test]
1433    fn test_parts_application_content_types_are_saved() {
1434        let mut email = AnalyzeEmails::default();
1435        email.charset = "utf-8".to_string();
1436        email.files = object! {};
1437
1438        let content_types = [
1439            "application/octet-stream",
1440            "application/zip",
1441            "application/pdf",
1442            "image/jpeg",
1443            "image/png",
1444            "image/gif",
1445            "application/ics",
1446            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1447            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1448            "application/vnd.ms-excel",
1449        ];
1450        let body = b"Hello File".to_vec();
1451        let md5 = br_crypto::md5::encrypt_hex(&body);
1452
1453        for (idx, content_type) in content_types.iter().enumerate() {
1454            let filename = format!("{}-{idx}.bin", unique_token("app-attachment"));
1455            let part = format!(
1456                "Content-Type: {content_type}; name=\"{filename}\"\r\nContent-Transfer-Encoding: base64\r\n\r\nSGVsbG8gRmlsZQ=="
1457            );
1458
1459            email.parts(part, "raw".to_string()).unwrap();
1460
1461            let entry = &email.files[md5.as_str()];
1462            assert_eq!(entry["name"].as_str().unwrap(), filename);
1463            assert_eq!(entry["content-type"].as_str().unwrap(), *content_type);
1464
1465            let path = entry["file"].as_str().unwrap();
1466            assert_eq!(fs::read(path).unwrap(), body);
1467            let _ = fs::remove_file(path);
1468        }
1469    }
1470
1471    #[test]
1472    fn test_parts_unknown_content_type_debug_writes_file() {
1473        let mut email = AnalyzeEmails::default();
1474        email.debug = true;
1475        email.md5 = unique_token("content-type");
1476        email.charset = "utf-8".to_string();
1477        email.files = object! {};
1478
1479        let path = env::current_dir()
1480            .unwrap()
1481            .join(format!("content_type-{}.eml", email.md5));
1482        let _ = fs::remove_file(&path);
1483
1484        let part = "Content-Type: application/x-custom\r\n\
1485Content-Transfer-Encoding: 7bit\r\n\
1486\r\n\
1487custom body"
1488            .to_string();
1489
1490        email
1491            .parts(part, "raw-unknown-content-type".to_string())
1492            .unwrap();
1493        assert!(path.exists());
1494        assert_eq!(
1495            fs::read_to_string(&path).unwrap(),
1496            "raw-unknown-content-type"
1497        );
1498
1499        let _ = fs::remove_file(path);
1500    }
1501
1502    #[test]
1503    fn test_encoded_line_paths() {
1504        let mut email = AnalyzeEmails::default();
1505
1506        assert_eq!(email.encoded_line("plain text"), "plain text");
1507        assert_eq!(email.encoded_line("=?UTF-8?Q?=48=65=6C=6C=6F?="), "Hello");
1508        assert_eq!(
1509            email.encoded_line("=?UTF-8?X?UnknownEncoding?="),
1510            "UnknownEncoding"
1511        );
1512    }
1513
1514    #[test]
1515    fn test_encoded_concatenated_mime_words() {
1516        let mut email = AnalyzeEmails::default();
1517
1518        // Adjacent encoded-words without space: =?utf-8?B?...?==?utf-8?B?...?=
1519        // This is the "nvoice" bug — "invoice.pdf" split across two encoded-words
1520        let result = email.encoded("=?UTF-8?B?aW52b2ljZQ==?==?UTF-8?B?LnBkZg==?=");
1521        assert_eq!(result, "invoice.pdf");
1522
1523        // Q-encoding variant
1524        let result = email.encoded("=?UTF-8?Q?inv?==?UTF-8?Q?oice.pdf?=");
1525        assert_eq!(result, "invoice.pdf");
1526
1527        // Single encoded-word still works
1528        let result = email.encoded("=?UTF-8?B?aW52b2ljZS5wZGY=?=");
1529        assert_eq!(result, "invoice.pdf");
1530
1531        // Mixed: encoded-word + space + encoded-word (already worked)
1532        let result = email.encoded("=?UTF-8?Q?hello?= =?UTF-8?Q?world?=");
1533        assert_eq!(result, "helloworld");
1534    }
1535
1536    #[test]
1537    fn test_datetime_parsing_variants_and_error() {
1538        let mut email = AnalyzeEmails::default();
1539
1540        email
1541            .datetime("Mon, 01 Jan 2024 12:00:00 GMT (UTC)")
1542            .unwrap();
1543        assert!(email.timestamp > 0);
1544        assert!(!email.datetime.is_empty());
1545
1546        let err = email.datetime("invalid datetime format").unwrap_err();
1547        assert!(err.to_string().contains("时间解析失败"));
1548    }
1549
1550    #[test]
1551    fn test_datetime_us_style_month_first() {
1552        let mut email = AnalyzeEmails::default();
1553        // 美式日期格式: Mon dd YYYY (如 "Nov 4 2025 22:19:26 +0800")
1554        email.datetime("Mon, Nov 4 2025 22:19:26 +0800").unwrap();
1555        assert!(email.timestamp > 0);
1556        assert!(!email.datetime.is_empty());
1557        // 标准格式仍然正常工作
1558        email.datetime("Tue, 15 Oct 2024 08:30:00 +0000").unwrap();
1559        assert!(email.timestamp > 0);
1560    }
1561    #[test]
1562    fn test_email_encoded_with_encoded_and_quoted_names() {
1563        let mut email = AnalyzeEmails::default();
1564        let result = email.email_encoded(
1565            "\"Quoted User\" <quoted@example.com>, =?UTF-8?B?5rWL6K+V?= <encoded@example.com>",
1566        );
1567
1568        assert_eq!(result.get("quoted@example.com").unwrap(), "Quoted User");
1569        assert_eq!(result.get("encoded@example.com").unwrap(), "测试");
1570    }
1571
1572    #[test]
1573    fn test_set_files_base64_decodes_and_detects_extension() {
1574        let mut email = AnalyzeEmails::default();
1575        email.files = object! {};
1576
1577        let filename = format!("{}.txt", unique_token("set-files"));
1578        email
1579            .set_files(
1580                ContentTransferEncoding::Base64,
1581                "c2V0IGZpbGVzIGJvZHk=\r\n",
1582                filename.as_str(),
1583                "".to_string(),
1584            )
1585            .unwrap();
1586
1587        let body = b"set files body".to_vec();
1588        let md5 = br_crypto::md5::encrypt_hex(&body);
1589        let entry = &email.files[md5.as_str()];
1590
1591        assert_eq!(entry["name"].as_str().unwrap(), filename);
1592        assert_eq!(entry["content-type"].as_str().unwrap(), "txt");
1593        let path = entry["file"].as_str().unwrap();
1594        assert_eq!(fs::read(path).unwrap(), body);
1595
1596        let _ = fs::remove_file(path);
1597    }
1598
1599    #[test]
1600    fn test_header_parses_colon_without_space_separator() {
1601        let email_data = b"From:sender@example.com\r\n\
1602To:receiver@example.com\r\n\
1603X-Test:nospaceval\r\n\
1604Content-Type:text/plain\r\n\
1605Date: 01 Jan 2024 12:00:00 +0000\r\n\
1606\r\n\
1607body"
1608            .to_vec();
1609
1610        let result = AnalyzeEmails::new(email_data, false).unwrap();
1611        assert_eq!(result.header.get("x-test").unwrap(), "nospaceval");
1612    }
1613
1614    #[test]
1615    fn test_multipart_header_without_boundary_keeps_boundary_empty() {
1616        let email_data = b"From: sender@example.com\r\n\
1617To: receiver@example.com\r\n\
1618Subject: Multipart Without Boundary\r\n\
1619Content-Type: multipart/mixed; charset=utf-8\r\n\
1620Date: 01 Jan 2024 12:00:00 +0000\r\n\
1621\r\n\
1622body without multipart markers"
1623            .to_vec();
1624
1625        let result = AnalyzeEmails::new(email_data, false).unwrap();
1626        assert_eq!(result.content_type, "multipart/mixed");
1627        assert!(result.boundary.is_empty());
1628    }
1629
1630    #[test]
1631    fn test_body_multipart_boundary_not_found_uses_original_body() {
1632        let boundary = unique_token("missing-boundary");
1633        let email_data = format!(
1634            "From: sender@example.com\r\n\
1635To: receiver@example.com\r\n\
1636Subject: Boundary Missing In Body\r\n\
1637Content-Type: multipart/mixed;boundary=\"{boundary}\";charset=\"utf-8\"\r\n\
1638Content-Transfer-Encoding: 7bit\r\n\
1639Date: 01 Jan 2024 12:00:00 +0000\r\n\
1640\r\n\
1641this body intentionally has no boundary lines"
1642        )
1643        .into_bytes();
1644
1645        let result = AnalyzeEmails::new(email_data, false).unwrap();
1646        assert_eq!(result.content_type, "multipart/mixed");
1647        assert_eq!(result.boundary, boundary);
1648        assert!(result.body_text.is_empty());
1649        assert!(result.body_html.is_empty());
1650    }
1651
1652    #[test]
1653    fn test_body_multipart_skips_empty_part_segments() {
1654        let boundary = unique_token("empty-part");
1655        let email_data = format!(
1656            "From: sender@example.com\r\n\
1657To: receiver@example.com\r\n\
1658Subject: Empty Multipart Segment\r\n\
1659Content-Type: multipart/mixed; boundary=\"{boundary}\"\r\n\
1660Date: 01 Jan 2024 12:00:00 +0000\r\n\
1661\r\n\
1662--{boundary}\r\n\
1663Content-Type: text/plain\r\n\
1664Content-Transfer-Encoding: 7bit\r\n\
1665\r\n\
1666first text\r\n\
1667--{boundary}\r\n\
1668\r\n\
1669--{boundary}--\r\n"
1670        )
1671        .into_bytes();
1672
1673        let result = AnalyzeEmails::new(email_data, false).unwrap();
1674        assert_eq!(result.content_type, "multipart/mixed");
1675        assert!(result.body_text.contains("first text"));
1676    }
1677
1678    #[test]
1679    fn test_body_unknown_content_type_skips_gracefully() {
1680        let email_data = b"From: a@b.com\r\n\
1681Content-Type: application/json\r\n\
1682Date: 01 Jan 2024 12:00:00 +0000\r\n\
1683\r\n\
1684{\"key\":\"value\"}"
1685            .to_vec();
1686        let result = AnalyzeEmails::new(email_data, false);
1687        assert!(result.is_ok());
1688        let email = result.unwrap();
1689        assert!(email.body_text.is_empty());
1690        assert!(email.body_html.is_empty());
1691    }
1692
1693    #[test]
1694    fn test_parts_ignores_header_line_without_colon() {
1695        let mut email = AnalyzeEmails::default();
1696        email.charset = "utf-8".to_string();
1697        email.files = object! {};
1698
1699        let part = "NoColonHeader\r\n\
1700Content-Type: text/plain\r\n\
1701Content-Transfer-Encoding: 7bit\r\n\
1702\r\n\
1703plain body"
1704            .to_string();
1705
1706        email.parts(part, "raw".to_string()).unwrap();
1707        assert_eq!(email.body_text, "plain body");
1708    }
1709
1710    #[test]
1711    fn test_parts_content_type_boundary_with_semicolon_suffix() {
1712        let mut email = AnalyzeEmails::default();
1713        email.charset = "utf-8".to_string();
1714        email.files = object! {};
1715
1716        let boundary = unique_token("inner-semi");
1717        let part = format!(
1718            "Content-Type: multipart/alternative; boundary=\"{boundary}\"; charset=\"utf-8\"\r\n\
1719Content-Transfer-Encoding: 7bit\r\n\
1720\r\n\
1721--{boundary}\r\n\
1722Content-Type: text/plain\r\n\
1723Content-Transfer-Encoding: 7bit\r\n\
1724\r\n\
1725nested plain body\r\n\
1726--{boundary}--\r\n"
1727        );
1728
1729        email.parts(part, "raw".to_string()).unwrap();
1730        assert!(email.body_text.contains("nested plain body"));
1731    }
1732
1733    #[test]
1734    fn test_parts_text_html_without_filename_sets_body_html() {
1735        let mut email = AnalyzeEmails::default();
1736        email.charset = "utf-8".to_string();
1737        email.files = object! {};
1738
1739        let part = "Content-Type: text/html\r\n\
1740Content-Transfer-Encoding: 7bit\r\n\
1741\r\n\
1742<p>inline html body</p>"
1743            .to_string();
1744
1745        email.parts(part, "raw".to_string()).unwrap();
1746        assert!(email.body_html.contains("<p>inline html body</p>"));
1747    }
1748
1749    #[test]
1750    fn test_parts_nested_multipart_outer_boundary_not_found_skips_empty_part() {
1751        let mut email = AnalyzeEmails::default();
1752        email.charset = "utf-8".to_string();
1753        email.files = object! {};
1754        email.boundary = unique_token("outer-boundary");
1755
1756        let inner_boundary = unique_token("inner-boundary");
1757        let part = format!(
1758            "Content-Type: multipart/alternative; boundary=\"{inner_boundary}\"\r\n\
1759Content-Transfer-Encoding: 7bit\r\n\
1760\r\n\
1761--{inner_boundary}\r\n\
1762Content-Type: text/plain\r\n\
1763Content-Transfer-Encoding: 7bit\r\n\
1764\r\n\
1765nested plain text\r\n\
1766--{inner_boundary}\r\n\
1767\r\n\
1768--{inner_boundary}--\r\n"
1769        );
1770
1771        email.parts(part, "raw".to_string()).unwrap();
1772        assert!(email.body_text.contains("nested plain text"));
1773    }
1774
1775    #[test]
1776    fn test_set_files_returns_error_when_create_fails() {
1777        let mut email = AnalyzeEmails::default();
1778        email.files = object! {};
1779
1780        let filename = "test\0file.txt";
1781
1782        let err = email
1783            .set_files(
1784                ContentTransferEncoding::Bit7,
1785                "ignored",
1786                filename,
1787                "application/octet-stream".to_string(),
1788            )
1789            .unwrap_err();
1790
1791        assert!(err.to_string().contains("打开（创建）临时文件"));
1792    }
1793
1794    #[test]
1795    fn test_content_transfer_encoding_decode_quoted_printable() {
1796        let mut enc = ContentTransferEncoding::QuotedPrintable;
1797        let data = b"Hello=20World=21".to_vec();
1798        let result = enc.decode(data).unwrap();
1799        assert_eq!(result, b"Hello World!");
1800    }
1801}
br_email/analyze.rs

br_email/
analyze.rs