1use br_crypto::encoding::code_to_utf8;
2use br_crypto::hash::u8_to_md5;
3use chrono::{DateTime, Local, TimeZone};
4use json::{object, JsonValue};
5use regex::Regex;
6use std::collections::HashMap;
7use std::ffi::OsStr;
8use std::io::{Error, ErrorKind, Write};
9use std::{env, fs, io};
10
11#[derive(Debug)]
13pub struct AnalyzeEmails {
14 pub debug: bool,
15 pub header: HashMap<String, String>,
16 pub mime_version: String,
17 boundary: String,
18 pub md5: String,
19 pub size: usize,
20 pub timestamp: i64,
22 pub datetime: String,
24 pub subject: String,
26 pub from: HashMap<String, String>,
28 pub to: HashMap<String, String>,
30 pub cc: HashMap<String, String>,
32 pub replyto: HashMap<String, String>,
34 pub content_type: String,
36 pub content_transfer_encoding: ContentTransferEncoding,
38 pub sender: String,
40 pub body_text: String,
41 pub body_html: String,
42 pub files: JsonValue,
43 pub charset: String,
44}
45
46impl AnalyzeEmails {
47 pub fn new(mut data: Vec<u8>, debug: bool) -> io::Result<AnalyzeEmails> {
48 let md5 = u8_to_md5(data.clone()).leak().to_string();
49 let size = data.len();
50 let data_string = unsafe { String::from_utf8_unchecked(data.clone()) };
51 if data_string.contains("\n\n") {
52 let updated_string = data_string.replace("\n", "\r\n");
53 data = updated_string.as_bytes().to_vec();
54 }
55
56 let subsequence = "\r\n\r\n".as_bytes();
57
58 let (header, body) = match data
59 .windows(subsequence.len())
60 .position(|window| window == subsequence)
61 {
62 None => {
63 if debug {
64 fs::write(
65 format!(
66 "{}/xygs-{}.eml",
67 env::current_dir().unwrap().to_str().unwrap(),
68 md5
69 ),
70 data.clone(),
71 )?;
72 }
73 return Err(Error::new(ErrorKind::Other, format!("协议格式错误: {md5}")));
74 }
75 Some(e) => (data[..e].to_vec(), data[e + 4..].to_vec()),
76 };
77 let mut that = Self {
78 debug,
79 header: Default::default(),
80 mime_version: "".to_string(),
81 boundary: "".to_string(),
82 md5,
83 size,
84 timestamp: 0,
85 subject: "".to_string(),
86 from: Default::default(),
87 to: Default::default(),
88 cc: Default::default(),
89 replyto: Default::default(),
90 datetime: "".to_string(),
91 content_type: "".to_string(),
92 content_transfer_encoding: ContentTransferEncoding::Bit7,
93 sender: "".to_string(),
94 body_text: "".to_string(),
95 body_html: "".to_string(),
96 files: object! {},
97 charset: "utf-8".to_string(),
98 };
99 that.header(header)?;
100 that.body(body, data_string)?;
101 Ok(that)
102 }
103
104 fn header(&mut self, data: Vec<u8>) -> io::Result<()> {
105 let data = unsafe { String::from_utf8_unchecked(data) };
106 let data = data.replace("\r\n\t", "").replace("\r\n ", " ").leak();
107 for item in data.lines() {
108 let (key, value) = match item.find(": ") {
109 Some(e) => (&item[..e], &item[e + 2..]),
110 None => match item.find(":") {
111 Some(e) => (&item[..e], &item[e + 1..]),
112 None => continue,
113 },
114 };
115 let name = key.to_lowercase().leak();
116 if value.is_empty() {
117 continue;
118 }
119 match key.to_lowercase().as_str() {
120 "mime-version" => self.mime_version = value.to_string(),
121 "from" => {
122 self.from = self.from(value);
123 }
124 "sender" => {
125 self.sender = value.to_string();
126 }
127 "to" => {
128 self.to = self.email_encoded(value);
129 }
130 "cc" => {
131 self.cc = self.email_encoded(value);
132 }
133 "reply-to" => {
134 self.replyto = self.email_encoded(value);
135 }
136 "subject" => {
137 self.subject = self.subject(value.to_string());
138 }
139 "content-type" => {
140 let types = value.split(";").collect::<Vec<&str>>();
141 self.content_type = types[0].trim().to_lowercase().to_string();
142 match self.content_type.as_str() {
143 "multipart/mixed"
144 | "multipart/alternative"
145 | "multipart/related"
146 | "multipart/report" => match types[1].find("boundary=") {
147 None => {}
148 Some(e) => {
149 let boundary = &types[1][e..];
150 self.boundary = boundary
151 .trim()
152 .trim_start_matches("boundary=")
153 .trim_start_matches("\"")
154 .trim_end_matches("\"")
155 .to_string();
156 }
157 },
158 _ => {}
159 }
160 if types.len() > 1 {
161 for item in types.iter() {
162 if item.contains("charset=") {
163 self.charset = item
164 .trim_start_matches("charset=")
165 .trim_start_matches("\"")
166 .trim_end_matches("\"")
167 .to_string();
168 }
169 }
170 }
171 }
172 "content-transfer-encoding" => {
173 self.content_transfer_encoding = ContentTransferEncoding::from(value);
174 }
175 "date" => self.datetime(value)?,
176 _ => {
177 self.header
178 .insert(name.trim().to_string(), value.to_string());
179 }
180 }
181 }
182 Ok(())
183 }
184 fn body(&mut self, data: Vec<u8>, old_data: String) -> io::Result<()> {
185 match self.content_type.to_lowercase().as_str() {
186 "text/html" => {
187 let data = self.content_transfer_encoding.decode(data)?;
188 let res = code_to_utf8(self.charset.as_str(), data.clone());
189 self.body_html = res;
190 }
191 "text/plain" => {
192 let data = self.content_transfer_encoding.decode(data)?;
193 let res = code_to_utf8(self.charset.as_str(), data.clone());
194 self.body_text = res;
195 }
196 "multipart/mixed"
197 | "multipart/alternative"
198 | "multipart/related"
199 | "multipart/report" => {
200 let data = self.content_transfer_encoding.decode(data.clone())?;
201 let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
202 let mut parts_list = vec![];
203 let mut text = String::new();
204
205 parts = match parts.find(self.boundary.as_str()) {
206 None => parts,
207 Some(e) => parts[e..].to_string(),
208 };
209 for item in parts.lines() {
210 if item.contains(self.boundary.as_str()) && text.is_empty() {
211 continue;
212 }
213 if item.contains(self.boundary.as_str()) && text.clone() != "" {
214 parts_list.push(text.clone());
215 text = String::new();
216 continue;
217 }
218 text = format!("{}{}\r\n", text, item);
219 }
220 for part in parts_list {
221 if part.trim().is_empty() {
222 continue;
223 }
224 self.parts(part.to_string(), old_data.clone())?;
225 }
226 }
227 _ => {
228 return Err(Error::new(
229 ErrorKind::NotFound,
230 format!("未知body类型: {}", self.content_type),
231 ));
232 }
233 }
234 Ok(())
235 }
236 fn parts(&mut self, data: String, old_data: String) -> io::Result<()> {
238 let (header, body) = match data.find("\r\n\r\n") {
239 None => {
240 if self.debug {
241 fs::write(
242 format!(
243 "{}/head-{}.eml",
244 env::current_dir().unwrap().to_str().unwrap(),
245 self.md5
246 ),
247 old_data.clone(),
248 )?;
249 }
250 return Err(Error::new(ErrorKind::Other, "解析附件头失败"));
251 }
252 Some(e) => (
253 &data[..e]
254 .replace("\r\n\t", " ")
255 .replace("\r\n ", " ")
256 .leak()
257 .lines(),
258 &data[e + 4..],
259 ),
260 };
261
262 let mut filename = "".to_string();
263 let mut content_type = "";
264 let mut boundary = "";
265 let mut content_transfer_encoding = ContentTransferEncoding::None;
266 for item in header.clone() {
267 let (key, value) = match item.find(": ") {
268 Some(e) => (&item[..e], &item[e + 2..]),
269 None => match item.find(":") {
270 Some(e) => (&item[..e], &item[e + 1..]),
271 None => continue,
272 },
273 };
274
275 let name = key.to_lowercase();
276
277 match name.trim() {
278 "content-transfer-encoding" => {
279 content_transfer_encoding = ContentTransferEncoding::from(value)
280 }
281 "content-type" => {
282 let types = value.trim().split(";").collect::<Vec<&str>>();
283 content_type = types[0].trim();
284 let name = types
285 .iter()
286 .filter(|&x| x.trim().starts_with("name="))
287 .map(|&x| x.trim().to_string())
288 .collect::<Vec<String>>();
289 if !name.is_empty() {
290 let name = name[0].trim_start_matches("name=");
291 filename = self.encoded(name);
292 }
293 match value.find("boundary=") {
294 None => {}
295 Some(i) => {
296 boundary = &value[i + 9..];
297 boundary = match boundary.find(";") {
298 None => boundary,
299 Some(i) => &boundary[..i],
300 };
301 boundary = boundary.trim_start_matches("\"").trim_end_matches("\"");
302 }
303 }
304 }
305 "content-id"
306 | "content-length"
307 | "mime-version"
308 | "content-description"
309 | "date"
310 | "x-attachment-id" => {}
311 "content-disposition" => {
312 if filename.is_empty() && value.contains("filename=") {
313 filename = value.split("filename=").collect::<Vec<&str>>()[1]
314 .trim_start_matches("\"")
315 .trim_end_matches("\"")
316 .to_string();
317 }
318 if filename.is_empty() && value.contains("filename*=utf-8''") {
319 filename = value.split("filename*=utf-8''").collect::<Vec<&str>>()[1]
320 .trim_start_matches("\"")
321 .trim_end_matches("\"")
322 .to_string();
323 filename = br_crypto::encoding::urlencoding_decode(filename.as_str());
324 }
325 }
326 _ => {
327 return Err(Error::new(
328 ErrorKind::NotFound,
329 format!("parts 未知 header 类型: {} [{}]", name, item),
330 ));
331 }
332 }
333 }
334
335 match content_type {
336 "text/plain" => {
337 if filename.is_empty() {
338 let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
339 let text = code_to_utf8(self.charset.as_str(), res.clone());
340 self.body_text = text;
341 } else {
342 self.set_files(content_transfer_encoding, body, filename.as_str(), "".to_string())?;
343 }
344 }
345 "text/html" | "text/x-amp-html" => {
346 if filename.is_empty() {
347 let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
348 self.body_html = code_to_utf8(self.charset.as_str(), res.clone());
349 } else {
350 self.set_files(content_transfer_encoding, body, filename.as_str(), "".to_string())?;
351 }
352 }
353 "multipart/mixed" | "multipart/alternative" | "multipart/related" => {
354 let data = self
355 .content_transfer_encoding
356 .decode(body.as_bytes().to_vec())?;
357 let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
358
359 parts = match parts.find(self.boundary.as_str()) {
360 None => parts,
361 Some(e) => parts[e..].to_string(),
362 };
363
364 let mut parts_list = vec![];
365 let mut text = String::new();
366 for item in parts.lines() {
367 if item.contains(boundary) && text.is_empty() {
368 continue;
369 }
370 if item.contains(boundary) && !text.is_empty() {
371 parts_list.push(text);
372 text = String::new();
373 continue;
374 }
375 text = format!("{}{}\r\n", text, item);
376 }
377 for part in parts_list {
378 if part.trim().is_empty() {
379 continue;
380 }
381 self.parts(part.to_string(), old_data.clone())?;
382 }
383 }
384 "text/calendar" => {}
385 "application/octet-stream"
386 | "application/zip"
387 | "application/pdf"
388 | "image/jpeg"
389 | "application/ics"
390 | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => {
391 if !filename.is_empty() {
392 self.set_files(
393 content_transfer_encoding,
394 body,
395 filename.as_str(),
396 content_type.to_string(),
397 )?;
398 }
399 }
400 _ => {
401 if self.debug {
402 fs::write(
403 format!(
404 "{}/content_type-{}.eml",
405 env::current_dir().unwrap().to_str().unwrap(),
406 self.md5
407 ),
408 old_data.clone(),
409 )?;
410 }
411 return Err(Error::new(
412 ErrorKind::NotFound,
413 format!("未知 parts content_type 类型: {}", content_type),
414 ));
415 }
416 }
417 Ok(())
418 }
419 pub fn from(&mut self, value: &str) -> HashMap<String, String> {
420 let mut r = value
421 .split("<")
422 .filter(|x| !x.trim().is_empty())
423 .map(|x| x.trim())
424 .collect::<Vec<&str>>();
425 if r[0].starts_with("\"") && r[0].ends_with("\"") {
426 r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"").trim();
427 }
428 let mut emails = HashMap::new();
429 if r.len() == 1 {
430 let name = r[0].trim_end_matches(">").to_string();
431 emails.insert(name.clone(), name);
432 } else {
433 let name = self.encoded(r[0].trim());
434 let email = r[1].trim_end_matches(">").to_string();
435 emails.insert(email, name);
436 }
437 emails
438 }
439 fn subject(&mut self, value: String) -> String {
440 let value = value.replace("?==?", "?=\r\n\t=?");
441 if !value.contains("=?") && !value.contains("?=") {
442 return value.to_string();
443 }
444 let list = value.split("\r\n\t").collect::<Vec<&str>>();
445 let mut txt = vec![];
446 for item in list {
447 txt.push(self.encoded(item));
448 }
449 txt.join("")
450 }
451
452 fn encoded(&mut self, value: &str) -> String {
453 let t = value.trim_start_matches("\"").trim_end_matches("\"");
454 if t.contains("=?") && t.contains("?=") {
455 let l = t.split(" ").collect::<Vec<&str>>();
456 let mut txt = vec![];
457 for item in l {
458 txt.push(self.encoded_line(item));
459 }
460 txt.join("")
461 } else {
462 t.to_string()
463 }
464 }
465 fn encoded_line(&mut self, value: &str) -> String {
467 let line = value.split("?").collect::<Vec<&str>>();
468 if line.len() == 1 {
469 return value.to_string();
470 }
471 let charset = line[1].to_lowercase().to_string().leak();
472 let code = line[2].to_uppercase();
473 let data = line[3];
474
475 let strs = match code.as_str() {
476 "B" => br_crypto::base64::decode_u8(data),
477 "Q" => br_crypto::qp::decode(data).unwrap_or(vec![]),
478 _ => data.as_bytes().to_vec(),
479 };
480 let text = code_to_utf8(charset, strs.clone());
481 text.chars().filter(|&x| x != '\u{200b}').collect()
482 }
483
484 fn datetime(&mut self, value: &str) -> io::Result<()> {
486 let re = Regex::new(r"\s*\(.*\)$").unwrap();
487 let datetime = re.replace(value, "").to_string();
488 let datetime = datetime.replace("GMT", "+0000").to_string();
489 let datetime = match datetime.find(",") {
490 None => datetime,
491 Some(i) => datetime[i + 1..].parse().unwrap(),
492 };
493 let datetime = match DateTime::parse_from_str(datetime.as_str(), "%d %b %Y %H:%M:%S %z") {
494 Ok(e) => e,
495 Err(e) => {
496 return Err(Error::new(
497 ErrorKind::Other,
498 format!("时间解析失败: {e} [{datetime:?}]"),
499 ))
500 }
501 };
502 self.timestamp = datetime.timestamp();
503 self.datetime = Local
504 .timestamp_opt(self.timestamp, 0)
505 .unwrap()
506 .with_timezone(&Local)
507 .format("%Y-%m-%d %H:%M:%S")
508 .to_string();
509 Ok(())
510 }
511 pub fn email_encoded(&mut self, value: &str) -> HashMap<String, String> {
512 let list = value.split(",").map(|x| x.trim()).collect::<Vec<&str>>();
513 let mut emails = HashMap::new();
514 for item in list {
515 let mut r = item.split(" <").collect::<Vec<&str>>();
516 if r[0].starts_with("\"") && r[0].ends_with("\"") {
517 r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"");
518 }
519 if r.len() == 1 {
520 let name = r[0]
521 .trim_start_matches("<")
522 .trim_end_matches(">")
523 .to_string();
524 emails.insert(name.clone(), name);
525 } else {
526 let name = self.encoded(r[0].trim());
527 let email = r[1].trim_end_matches(">").to_string();
528 emails.insert(email, name);
529 }
530 }
531 emails
532 }
533 fn set_files(
534 &mut self,
535 mut content_transfer_encoding: ContentTransferEncoding,
536 body: &str,
537 filename: &str,
538 mut content_type: String,
539 ) -> io::Result<()> {
540 let mut data = "";
541 if let ContentTransferEncoding::Base64 = content_transfer_encoding {
542 let mut text = "".to_string();
543 for line in body.lines() {
544 text += line;
545 }
546 data = text.leak();
547 }
548
549 let body = content_transfer_encoding.decode(data.as_bytes().to_vec())?;
550 let md5 = u8_to_md5(body.clone());
551 let size = body.len();
552 let mut temp_dir = env::temp_dir();
553 temp_dir.push(filename);
554 let path_temp_dir=temp_dir.clone();
555
556 let mut temp_file = match fs::File::create(temp_dir.clone()) {
557 Ok(e) => e,
558 Err(e) => {
559 return Err(Error::new(
560 ErrorKind::Other,
561 format!("打开(创建)临时文件: {} [{}]", e, filename),
562 ))
563 }
564 };
565
566 if temp_file.write(body.as_slice()).is_ok() {
567 if content_type.is_empty() {
568 content_type = path_temp_dir.extension()
569 .unwrap_or(OsStr::new("unknown"))
570 .to_str()
571 .unwrap_or("unknown").to_string();
572 }
573
574 self.files[md5.as_str()] = object! {
575 name:filename,
576 md5:md5.clone(),
577 size:size,
578 "content-type":content_type.clone(),
579 file:temp_dir.to_str()
580 };
581 };
582 Ok(())
583 }
584}
585
586impl Default for AnalyzeEmails {
587 fn default() -> Self {
588 Self {
589 debug: false,
590 header: Default::default(),
591 mime_version: "".to_string(),
592 boundary: "".to_string(),
593 md5: "".to_string(),
594 size: 0,
595 timestamp: 0,
596 datetime: "".to_string(),
597 subject: "".to_string(),
598 from: Default::default(),
599 to: Default::default(),
600 cc: Default::default(),
601 replyto: Default::default(),
602 content_type: "".to_string(),
603 content_transfer_encoding: ContentTransferEncoding::None,
604 sender: "".to_string(),
605 body_text: "".to_string(),
606 body_html: "".to_string(),
607 files: JsonValue::Null,
608 charset: "".to_string(),
609 }
610 }
611}
612
613#[derive(Debug)]
620pub enum ContentTransferEncoding {
621 QuotedPrintable,
624 Base64,
627 Binary,
630 Bit8,
633 Bit7,
636 None,
637}
638
639impl ContentTransferEncoding {
640 fn from(value: &str) -> Self {
641 match value.to_lowercase().as_str() {
642 "7bit" => Self::Bit7,
643 "8bit" => Self::Bit8,
644 "binary" => Self::Binary,
645 "base64" => Self::Base64,
646 "quoted-printable" => Self::QuotedPrintable,
647 _ => Self::None,
648 }
649 }
650 fn decode(&mut self, mut data: Vec<u8>) -> io::Result<Vec<u8>> {
651 let res = match self {
652 ContentTransferEncoding::QuotedPrintable => br_crypto::qp::decode(data)?,
653 ContentTransferEncoding::Base64 => {
654 let str = unsafe { String::from_utf8_unchecked(data) };
655 let mut text = "".to_string();
656 for line in str.lines() {
657 text += line;
658 }
659 data = text.leak().as_bytes().to_vec();
660 br_crypto::base64::decode_u8(data)
661 }
662 ContentTransferEncoding::Binary => data,
663 ContentTransferEncoding::Bit8 => data,
664 ContentTransferEncoding::Bit7 => data,
665 ContentTransferEncoding::None => data,
666 };
667 Ok(res)
668 }
669}