1use br_crypto::encoding::code_to_utf8;
2use chrono::{DateTime, Local, TimeZone};
3use json::{object, JsonValue};
4use regex::Regex;
5use std::collections::HashMap;
6use std::ffi::OsStr;
7use std::io::{Error, ErrorKind, Write};
8use std::{env, fs, io};
9
10#[derive(Debug)]
12pub struct AnalyzeEmails {
13 pub debug: bool,
14 pub header: HashMap<String, String>,
15 pub mime_version: String,
16 boundary: String,
17 pub md5: String,
18 pub size: usize,
19 pub timestamp: i64,
21 pub datetime: String,
23 pub subject: String,
25 pub from: HashMap<String, String>,
27 pub to: HashMap<String, String>,
29 pub cc: HashMap<String, String>,
31 pub replyto: HashMap<String, String>,
33 pub content_type: String,
35 pub content_transfer_encoding: ContentTransferEncoding,
37 pub sender: String,
39 pub body_text: String,
40 pub body_html: String,
41 pub files: JsonValue,
42 pub charset: String,
43}
44
45impl AnalyzeEmails {
46 pub fn new(mut data: Vec<u8>, debug: bool) -> io::Result<AnalyzeEmails> {
47 let md5 = br_crypto::md5::encrypt_hex(&data.clone())
48 .leak()
49 .to_string();
50 let size = data.len();
51 let data_string = unsafe { String::from_utf8_unchecked(data.clone()) };
52 if data_string.contains("\n\n") {
53 let updated_string = data_string.replace("\n", "\r\n");
54 data = updated_string.as_bytes().to_vec();
55 }
56
57 let subsequence = "\r\n\r\n".as_bytes();
58
59 let (header, body) = match data
60 .windows(subsequence.len())
61 .position(|window| window == subsequence)
62 {
63 None => {
64 if debug {
65 fs::write(
66 format!(
67 "{}/xygs-{}.eml",
68 env::current_dir().unwrap().to_str().unwrap(),
69 md5
70 ),
71 data.clone(),
72 )?;
73 }
74 return Err(Error::other(format!("协议格式错误: {md5}")));
75 }
76 Some(e) => (data[..e].to_vec(), data[e + 4..].to_vec()),
77 };
78 let mut that = Self {
79 debug,
80 header: Default::default(),
81 mime_version: "".to_string(),
82 boundary: "".to_string(),
83 md5,
84 size,
85 timestamp: 0,
86 subject: "".to_string(),
87 from: Default::default(),
88 to: Default::default(),
89 cc: Default::default(),
90 replyto: Default::default(),
91 datetime: "".to_string(),
92 content_type: "".to_string(),
93 content_transfer_encoding: ContentTransferEncoding::Bit7,
94 sender: "".to_string(),
95 body_text: "".to_string(),
96 body_html: "".to_string(),
97 files: object! {},
98 charset: "utf-8".to_string(),
99 };
100 that.header(header)?;
101 that.body(body, data_string)?;
102 Ok(that)
103 }
104
105 fn header(&mut self, data: Vec<u8>) -> io::Result<()> {
106 let data = unsafe { String::from_utf8_unchecked(data) };
107 let data = data.replace("\r\n\t", "").replace("\r\n ", " ").leak();
108 for item in data.lines() {
109 let (key, value) = match item.find(": ") {
110 Some(e) => (&item[..e], &item[e + 2..]),
111 None => match item.find(":") {
112 Some(e) => (&item[..e], &item[e + 1..]),
113 None => continue,
114 },
115 };
116 let name = key.to_lowercase().leak();
117 if value.is_empty() {
118 continue;
119 }
120 match key.to_lowercase().as_str() {
121 "mime-version" => self.mime_version = value.to_string(),
122 "from" => {
123 self.from = self.from(value);
124 }
125 "sender" => {
126 self.sender = value.to_string();
127 }
128 "to" => {
129 self.to = self.email_encoded(value);
130 }
131 "cc" => {
132 self.cc = self.email_encoded(value);
133 }
134 "reply-to" => {
135 self.replyto = self.email_encoded(value);
136 }
137 "subject" => {
138 self.subject = self.subject(value.to_string());
139 }
140 "content-type" => {
141 let types = value.split(";").collect::<Vec<&str>>();
142 self.content_type = types[0].trim().to_lowercase().to_string();
143 match self.content_type.as_str() {
144 "multipart/mixed"
145 | "multipart/alternative"
146 | "multipart/related"
147 | "multipart/report" => match types[1].find("boundary=") {
148 None => {}
149 Some(e) => {
150 let boundary = &types[1][e..];
151 self.boundary = boundary
152 .trim()
153 .trim_start_matches("boundary=")
154 .trim_start_matches("\"")
155 .trim_end_matches("\"")
156 .to_string();
157 }
158 },
159 _ => {}
160 }
161 if types.len() > 1 {
162 for item in types.iter() {
163 if item.contains("charset=") {
164 self.charset = item
165 .trim_start_matches("charset=")
166 .trim_start_matches("\"")
167 .trim_end_matches("\"")
168 .to_string();
169 }
170 }
171 }
172 }
173 "content-transfer-encoding" => {
174 self.content_transfer_encoding = ContentTransferEncoding::from(value);
175 }
176 "date" => self.datetime(value)?,
177 _ => {
178 self.header
179 .insert(name.trim().to_string(), value.to_string());
180 }
181 }
182 }
183 Ok(())
184 }
185 fn body(&mut self, data: Vec<u8>, old_data: String) -> io::Result<()> {
186 match self.content_type.to_lowercase().as_str() {
187 "text/html" => {
188 let data = self.content_transfer_encoding.decode(data)?;
189 let res = code_to_utf8(self.charset.as_str(), data.clone());
190 self.body_html = res;
191 }
192 "text/plain" => {
193 let data = self.content_transfer_encoding.decode(data)?;
194 let res = code_to_utf8(self.charset.as_str(), data.clone());
195 self.body_text = res;
196 }
197 "multipart/mixed"
198 | "multipart/alternative"
199 | "multipart/related"
200 | "multipart/report" => {
201 let data = self.content_transfer_encoding.decode(data.clone())?;
202 let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
203 let mut parts_list = vec![];
204 let mut text = String::new();
205
206 parts = match parts.find(self.boundary.as_str()) {
207 None => parts,
208 Some(e) => parts[e..].to_string(),
209 };
210 for item in parts.lines() {
211 if item.contains(self.boundary.as_str()) && text.is_empty() {
212 continue;
213 }
214 if item.contains(self.boundary.as_str()) && text.clone() != "" {
215 parts_list.push(text.clone());
216 text = String::new();
217 continue;
218 }
219 text = format!("{text}{item}\r\n");
220 }
221 for part in parts_list {
222 if part.trim().is_empty() {
223 continue;
224 }
225 self.parts(part.to_string(), old_data.clone())?;
226 }
227 }
228 _ => {
229 return Err(Error::new(
230 ErrorKind::NotFound,
231 format!("未知body类型: {}", self.content_type),
232 ));
233 }
234 }
235 Ok(())
236 }
237 fn parts(&mut self, data: String, old_data: String) -> io::Result<()> {
239 let (header, body) = match data.find("\r\n\r\n") {
240 None => {
241 if self.debug {
242 fs::write(
243 format!(
244 "{}/head-{}.eml",
245 env::current_dir().unwrap().to_str().unwrap(),
246 self.md5
247 ),
248 old_data.clone(),
249 )?;
250 }
251 return Err(Error::other("解析附件头失败"));
252 }
253 Some(e) => (
254 &data[..e]
255 .replace("\r\n\t", " ")
256 .replace("\r\n ", " ")
257 .leak()
258 .lines(),
259 &data[e + 4..],
260 ),
261 };
262
263 let mut filename = "".to_string();
264 let mut content_type = "";
265 let mut boundary = "";
266 let mut content_transfer_encoding = ContentTransferEncoding::None;
267 for item in header.clone() {
268 let (key, value) = match item.find(": ") {
269 Some(e) => (&item[..e], &item[e + 2..]),
270 None => match item.find(":") {
271 Some(e) => (&item[..e], &item[e + 1..]),
272 None => continue,
273 },
274 };
275
276 let name = key.to_lowercase();
277
278 match name.trim() {
279 "content-transfer-encoding" => {
280 content_transfer_encoding = ContentTransferEncoding::from(value)
281 }
282 "content-type" => {
283 let types = value.trim().split(";").collect::<Vec<&str>>();
284 content_type = types[0].trim();
285 let name = types
286 .iter()
287 .filter(|&x| x.trim().starts_with("name="))
288 .map(|&x| x.trim().to_string())
289 .collect::<Vec<String>>();
290 if !name.is_empty() {
291 let name = name[0].trim_start_matches("name=");
292 filename = self.encoded(name);
293 }
294 match value.find("boundary=") {
295 None => {}
296 Some(i) => {
297 boundary = &value[i + 9..];
298 boundary = match boundary.find(";") {
299 None => boundary,
300 Some(i) => &boundary[..i],
301 };
302 boundary = boundary.trim_start_matches("\"").trim_end_matches("\"");
303 }
304 }
305 }
306 "content-id"
307 | "content-length"
308 | "mime-version"
309 | "content-description"
310 | "date"
311 | "x-attachment-id" => {}
312 "content-disposition" => {
313 if filename.is_empty() && value.contains("filename=") {
314 filename = value.split("filename=").collect::<Vec<&str>>()[1]
315 .trim_start_matches("\"")
316 .trim_end_matches("\"")
317 .to_string();
318 }
319 if filename.is_empty() && value.contains("filename*=utf-8''") {
320 filename = value.split("filename*=utf-8''").collect::<Vec<&str>>()[1]
321 .trim_start_matches("\"")
322 .trim_end_matches("\"")
323 .to_string();
324 filename = br_crypto::encoding::urlencoding_decode(filename.as_str());
325 }
326 }
327 _ => {
328 return Err(Error::new(
329 ErrorKind::NotFound,
330 format!("parts 未知 header 类型: {name} [{item}]"),
331 ));
332 }
333 }
334 }
335
336 match content_type {
337 "text/plain" => {
338 if filename.is_empty() {
339 let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
340 let text = code_to_utf8(self.charset.as_str(), res.clone());
341 self.body_text = text;
342 } else {
343 self.set_files(
344 content_transfer_encoding,
345 body,
346 filename.as_str(),
347 "".to_string(),
348 )?;
349 }
350 }
351 "text/html" | "text/x-amp-html" => {
352 if filename.is_empty() {
353 let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
354 self.body_html = code_to_utf8(self.charset.as_str(), res.clone());
355 } else {
356 self.set_files(
357 content_transfer_encoding,
358 body,
359 filename.as_str(),
360 "".to_string(),
361 )?;
362 }
363 }
364 "multipart/mixed" | "multipart/alternative" | "multipart/related" => {
365 let data = self
366 .content_transfer_encoding
367 .decode(body.as_bytes().to_vec())?;
368 let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
369
370 parts = match parts.find(self.boundary.as_str()) {
371 None => parts,
372 Some(e) => parts[e..].to_string(),
373 };
374
375 let mut parts_list = vec![];
376 let mut text = String::new();
377 for item in parts.lines() {
378 if item.contains(boundary) && text.is_empty() {
379 continue;
380 }
381 if item.contains(boundary) && !text.is_empty() {
382 parts_list.push(text);
383 text = String::new();
384 continue;
385 }
386 text = format!("{text}{item}\r\n");
387 }
388 for part in parts_list {
389 if part.trim().is_empty() {
390 continue;
391 }
392 self.parts(part.to_string(), old_data.clone())?;
393 }
394 }
395 "text/calendar" => {}
396 "application/octet-stream"
397 | "application/zip"
398 | "application/pdf"
399 | "image/jpeg"
400 | "application/ics"
401 | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => {
402 if !filename.is_empty() {
403 self.set_files(
404 content_transfer_encoding,
405 body,
406 filename.as_str(),
407 content_type.to_string(),
408 )?;
409 }
410 }
411 _ => {
412 if self.debug {
413 fs::write(
414 format!(
415 "{}/content_type-{}.eml",
416 env::current_dir().unwrap().to_str().unwrap(),
417 self.md5
418 ),
419 old_data.clone(),
420 )?;
421 }
422 return Err(Error::new(
423 ErrorKind::NotFound,
424 format!("未知 parts content_type 类型: {content_type}"),
425 ));
426 }
427 }
428 Ok(())
429 }
430 pub fn from(&mut self, value: &str) -> HashMap<String, String> {
431 let mut r = value
432 .split("<")
433 .filter(|x| !x.trim().is_empty())
434 .map(|x| x.trim())
435 .collect::<Vec<&str>>();
436 if r[0].starts_with("\"") && r[0].ends_with("\"") {
437 r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"").trim();
438 }
439 let mut emails = HashMap::new();
440 if r.len() == 1 {
441 let name = r[0].trim_end_matches(">").to_string();
442 emails.insert(name.clone(), name);
443 } else {
444 let name = self.encoded(r[0].trim());
445 let email = r[1].trim_end_matches(">").to_string();
446 emails.insert(email, name);
447 }
448 emails
449 }
450 fn subject(&mut self, value: String) -> String {
451 let value = value.replace("?==?", "?=\r\n\t=?");
452 if !value.contains("=?") && !value.contains("?=") {
453 return value.to_string();
454 }
455 let list = value.split("\r\n\t").collect::<Vec<&str>>();
456 let mut txt = vec![];
457 for item in list {
458 txt.push(self.encoded(item));
459 }
460 txt.join("")
461 }
462
463 fn encoded(&mut self, value: &str) -> String {
464 let t = value.trim_start_matches("\"").trim_end_matches("\"");
465 if t.contains("=?") && t.contains("?=") {
466 let l = t.split(" ").collect::<Vec<&str>>();
467 let mut txt = vec![];
468 for item in l {
469 txt.push(self.encoded_line(item));
470 }
471 txt.join("")
472 } else {
473 t.to_string()
474 }
475 }
476 fn encoded_line(&mut self, value: &str) -> String {
478 let line = value.split("?").collect::<Vec<&str>>();
479 if line.len() == 1 {
480 return value.to_string();
481 }
482 let charset = line[1].to_lowercase().to_string().leak();
483 let code = line[2].to_uppercase();
484 let data = line[3];
485
486 let strs = match code.as_str() {
487 "B" => br_crypto::base64::decode_u8(data),
488 "Q" => br_crypto::qp::decode(data).unwrap_or(vec![]),
489 _ => data.as_bytes().to_vec(),
490 };
491 let text = code_to_utf8(charset, strs.clone());
492 text.chars().filter(|&x| x != '\u{200b}').collect()
493 }
494
495 fn datetime(&mut self, value: &str) -> io::Result<()> {
497 let re = Regex::new(r"\s*\(.*\)$").unwrap();
498 let datetime = re.replace(value, "").to_string();
499 let datetime = datetime.replace("GMT", "+0000").to_string();
500 let datetime = match datetime.find(",") {
501 None => datetime,
502 Some(i) => datetime[i + 1..].parse().unwrap(),
503 };
504 let datetime = match DateTime::parse_from_str(datetime.as_str(), "%d %b %Y %H:%M:%S %z") {
505 Ok(e) => e,
506 Err(e) => return Err(Error::other(format!("时间解析失败: {e} [{datetime:?}]"))),
507 };
508 self.timestamp = datetime.timestamp();
509 self.datetime = Local
510 .timestamp_opt(self.timestamp, 0)
511 .unwrap()
512 .with_timezone(&Local)
513 .format("%Y-%m-%d %H:%M:%S")
514 .to_string();
515 Ok(())
516 }
517 pub fn email_encoded(&mut self, value: &str) -> HashMap<String, String> {
518 let list = value.split(",").map(|x| x.trim()).collect::<Vec<&str>>();
519 let mut emails = HashMap::new();
520 for item in list {
521 let mut r = item.split(" <").collect::<Vec<&str>>();
522 if r[0].starts_with("\"") && r[0].ends_with("\"") {
523 r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"");
524 }
525 if r.len() == 1 {
526 let name = r[0]
527 .trim_start_matches("<")
528 .trim_end_matches(">")
529 .to_string();
530 emails.insert(name.clone(), name);
531 } else {
532 let name = self.encoded(r[0].trim());
533 let email = r[1].trim_end_matches(">").to_string();
534 emails.insert(email, name);
535 }
536 }
537 emails
538 }
539 fn set_files(
540 &mut self,
541 mut content_transfer_encoding: ContentTransferEncoding,
542 body: &str,
543 filename: &str,
544 mut content_type: String,
545 ) -> io::Result<()> {
546 let mut data = "";
547 if let ContentTransferEncoding::Base64 = content_transfer_encoding {
548 let mut text = "".to_string();
549 for line in body.lines() {
550 text += line;
551 }
552 data = text.leak();
553 }
554
555 let body = content_transfer_encoding.decode(data.as_bytes().to_vec())?;
556 let md5 = br_crypto::md5::encrypt_hex(&body.clone());
557 let size = body.len();
558 let mut temp_dir = env::temp_dir();
559 temp_dir.push(filename);
560 let path_temp_dir = temp_dir.clone();
561
562 let mut temp_file = match fs::File::create(temp_dir.clone()) {
563 Ok(e) => e,
564 Err(e) => {
565 return Err(Error::other(format!(
566 "打开(创建)临时文件: {e} [{filename}]"
567 )))
568 }
569 };
570
571 if temp_file.write(body.as_slice()).is_ok() {
572 if content_type.is_empty() {
573 content_type = path_temp_dir
574 .extension()
575 .unwrap_or(OsStr::new("unknown"))
576 .to_str()
577 .unwrap_or("unknown")
578 .to_string();
579 }
580
581 self.files[md5.as_str()] = object! {
582 name:filename,
583 md5:md5.clone(),
584 size:size,
585 "content-type":content_type.clone(),
586 file:temp_dir.to_str()
587 };
588 };
589 Ok(())
590 }
591}
592
593impl Default for AnalyzeEmails {
594 fn default() -> Self {
595 Self {
596 debug: false,
597 header: Default::default(),
598 mime_version: "".to_string(),
599 boundary: "".to_string(),
600 md5: "".to_string(),
601 size: 0,
602 timestamp: 0,
603 datetime: "".to_string(),
604 subject: "".to_string(),
605 from: Default::default(),
606 to: Default::default(),
607 cc: Default::default(),
608 replyto: Default::default(),
609 content_type: "".to_string(),
610 content_transfer_encoding: ContentTransferEncoding::None,
611 sender: "".to_string(),
612 body_text: "".to_string(),
613 body_html: "".to_string(),
614 files: JsonValue::Null,
615 charset: "".to_string(),
616 }
617 }
618}
619
620#[derive(Debug)]
627pub enum ContentTransferEncoding {
628 QuotedPrintable,
631 Base64,
634 Binary,
637 Bit8,
640 Bit7,
643 None,
644}
645
646impl ContentTransferEncoding {
647 fn from(value: &str) -> Self {
648 match value.to_lowercase().as_str() {
649 "7bit" => Self::Bit7,
650 "8bit" => Self::Bit8,
651 "binary" => Self::Binary,
652 "base64" => Self::Base64,
653 "quoted-printable" => Self::QuotedPrintable,
654 _ => Self::None,
655 }
656 }
657 fn decode(&mut self, mut data: Vec<u8>) -> io::Result<Vec<u8>> {
658 let res = match self {
659 ContentTransferEncoding::QuotedPrintable => br_crypto::qp::decode(data)?,
660 ContentTransferEncoding::Base64 => {
661 let str = unsafe { String::from_utf8_unchecked(data) };
662 let mut text = "".to_string();
663 for line in str.lines() {
664 text += line;
665 }
666 data = text.leak().as_bytes().to_vec();
667 br_crypto::base64::decode_u8(data)
668 }
669 ContentTransferEncoding::Binary => data,
670 ContentTransferEncoding::Bit8 => data,
671 ContentTransferEncoding::Bit7 => data,
672 ContentTransferEncoding::None => data,
673 };
674 Ok(res)
675 }
676}