1use br_crypto::encoding::code_to_utf8;
2use chrono::{DateTime, Local, TimeZone};
3use json::{object, JsonValue};
4use regex::Regex;
5use std::collections::HashMap;
6use std::ffi::OsStr;
7use std::io::{Error, ErrorKind, Write};
8use std::{env, fs, io};
9
10#[derive(Debug)]
12pub struct AnalyzeEmails {
13 pub debug: bool,
14 pub header: HashMap<String, String>,
15 pub mime_version: String,
16 boundary: String,
17 pub md5: String,
18 pub size: usize,
19 pub timestamp: i64,
21 pub datetime: String,
23 pub subject: String,
25 pub from: HashMap<String, String>,
27 pub to: HashMap<String, String>,
29 pub cc: HashMap<String, String>,
31 pub replyto: HashMap<String, String>,
33 pub content_type: String,
35 pub content_transfer_encoding: ContentTransferEncoding,
37 pub sender: String,
39 pub body_text: String,
40 pub body_html: String,
41 pub files: JsonValue,
42 pub charset: String,
43}
44
45impl AnalyzeEmails {
46 pub fn new(mut data: Vec<u8>, debug: bool) -> io::Result<AnalyzeEmails> {
47 let md5 = br_crypto::md5::encrypt_hex(&data.clone()).leak().to_string();
48 let size = data.len();
49 let data_string = unsafe { String::from_utf8_unchecked(data.clone()) };
50 if data_string.contains("\n\n") {
51 let updated_string = data_string.replace("\n", "\r\n");
52 data = updated_string.as_bytes().to_vec();
53 }
54
55 let subsequence = "\r\n\r\n".as_bytes();
56
57 let (header, body) = match data
58 .windows(subsequence.len())
59 .position(|window| window == subsequence)
60 {
61 None => {
62 if debug {
63 fs::write(
64 format!(
65 "{}/xygs-{}.eml",
66 env::current_dir().unwrap().to_str().unwrap(),
67 md5
68 ),
69 data.clone(),
70 )?;
71 }
72 return Err(Error::other(format!("协议格式错误: {md5}")));
73 }
74 Some(e) => (data[..e].to_vec(), data[e + 4..].to_vec()),
75 };
76 let mut that = Self {
77 debug,
78 header: Default::default(),
79 mime_version: "".to_string(),
80 boundary: "".to_string(),
81 md5,
82 size,
83 timestamp: 0,
84 subject: "".to_string(),
85 from: Default::default(),
86 to: Default::default(),
87 cc: Default::default(),
88 replyto: Default::default(),
89 datetime: "".to_string(),
90 content_type: "".to_string(),
91 content_transfer_encoding: ContentTransferEncoding::Bit7,
92 sender: "".to_string(),
93 body_text: "".to_string(),
94 body_html: "".to_string(),
95 files: object! {},
96 charset: "utf-8".to_string(),
97 };
98 that.header(header)?;
99 that.body(body, data_string)?;
100 Ok(that)
101 }
102
103 fn header(&mut self, data: Vec<u8>) -> io::Result<()> {
104 let data = unsafe { String::from_utf8_unchecked(data) };
105 let data = data.replace("\r\n\t", "").replace("\r\n ", " ").leak();
106 for item in data.lines() {
107 let (key, value) = match item.find(": ") {
108 Some(e) => (&item[..e], &item[e + 2..]),
109 None => match item.find(":") {
110 Some(e) => (&item[..e], &item[e + 1..]),
111 None => continue,
112 },
113 };
114 let name = key.to_lowercase().leak();
115 if value.is_empty() {
116 continue;
117 }
118 match key.to_lowercase().as_str() {
119 "mime-version" => self.mime_version = value.to_string(),
120 "from" => {
121 self.from = self.from(value);
122 }
123 "sender" => {
124 self.sender = value.to_string();
125 }
126 "to" => {
127 self.to = self.email_encoded(value);
128 }
129 "cc" => {
130 self.cc = self.email_encoded(value);
131 }
132 "reply-to" => {
133 self.replyto = self.email_encoded(value);
134 }
135 "subject" => {
136 self.subject = self.subject(value.to_string());
137 }
138 "content-type" => {
139 let types = value.split(";").collect::<Vec<&str>>();
140 self.content_type = types[0].trim().to_lowercase().to_string();
141 match self.content_type.as_str() {
142 "multipart/mixed"
143 | "multipart/alternative"
144 | "multipart/related"
145 | "multipart/report" => match types[1].find("boundary=") {
146 None => {}
147 Some(e) => {
148 let boundary = &types[1][e..];
149 self.boundary = boundary
150 .trim()
151 .trim_start_matches("boundary=")
152 .trim_start_matches("\"")
153 .trim_end_matches("\"")
154 .to_string();
155 }
156 },
157 _ => {}
158 }
159 if types.len() > 1 {
160 for item in types.iter() {
161 if item.contains("charset=") {
162 self.charset = item
163 .trim_start_matches("charset=")
164 .trim_start_matches("\"")
165 .trim_end_matches("\"")
166 .to_string();
167 }
168 }
169 }
170 }
171 "content-transfer-encoding" => {
172 self.content_transfer_encoding = ContentTransferEncoding::from(value);
173 }
174 "date" => self.datetime(value)?,
175 _ => {
176 self.header
177 .insert(name.trim().to_string(), value.to_string());
178 }
179 }
180 }
181 Ok(())
182 }
183 fn body(&mut self, data: Vec<u8>, old_data: String) -> io::Result<()> {
184 match self.content_type.to_lowercase().as_str() {
185 "text/html" => {
186 let data = self.content_transfer_encoding.decode(data)?;
187 let res = code_to_utf8(self.charset.as_str(), data.clone());
188 self.body_html = res;
189 }
190 "text/plain" => {
191 let data = self.content_transfer_encoding.decode(data)?;
192 let res = code_to_utf8(self.charset.as_str(), data.clone());
193 self.body_text = res;
194 }
195 "multipart/mixed"
196 | "multipart/alternative"
197 | "multipart/related"
198 | "multipart/report" => {
199 let data = self.content_transfer_encoding.decode(data.clone())?;
200 let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
201 let mut parts_list = vec![];
202 let mut text = String::new();
203
204 parts = match parts.find(self.boundary.as_str()) {
205 None => parts,
206 Some(e) => parts[e..].to_string(),
207 };
208 for item in parts.lines() {
209 if item.contains(self.boundary.as_str()) && text.is_empty() {
210 continue;
211 }
212 if item.contains(self.boundary.as_str()) && text.clone() != "" {
213 parts_list.push(text.clone());
214 text = String::new();
215 continue;
216 }
217 text = format!("{text}{item}\r\n");
218 }
219 for part in parts_list {
220 if part.trim().is_empty() {
221 continue;
222 }
223 self.parts(part.to_string(), old_data.clone())?;
224 }
225 }
226 _ => {
227 return Err(Error::new(
228 ErrorKind::NotFound,
229 format!("未知body类型: {}", self.content_type),
230 ));
231 }
232 }
233 Ok(())
234 }
235 fn parts(&mut self, data: String, old_data: String) -> io::Result<()> {
237 let (header, body) = match data.find("\r\n\r\n") {
238 None => {
239 if self.debug {
240 fs::write(
241 format!(
242 "{}/head-{}.eml",
243 env::current_dir().unwrap().to_str().unwrap(),
244 self.md5
245 ),
246 old_data.clone(),
247 )?;
248 }
249 return Err(Error::other("解析附件头失败"));
250 }
251 Some(e) => (
252 &data[..e]
253 .replace("\r\n\t", " ")
254 .replace("\r\n ", " ")
255 .leak()
256 .lines(),
257 &data[e + 4..],
258 ),
259 };
260
261 let mut filename = "".to_string();
262 let mut content_type = "";
263 let mut boundary = "";
264 let mut content_transfer_encoding = ContentTransferEncoding::None;
265 for item in header.clone() {
266 let (key, value) = match item.find(": ") {
267 Some(e) => (&item[..e], &item[e + 2..]),
268 None => match item.find(":") {
269 Some(e) => (&item[..e], &item[e + 1..]),
270 None => continue,
271 },
272 };
273
274 let name = key.to_lowercase();
275
276 match name.trim() {
277 "content-transfer-encoding" => {
278 content_transfer_encoding = ContentTransferEncoding::from(value)
279 }
280 "content-type" => {
281 let types = value.trim().split(";").collect::<Vec<&str>>();
282 content_type = types[0].trim();
283 let name = types
284 .iter()
285 .filter(|&x| x.trim().starts_with("name="))
286 .map(|&x| x.trim().to_string())
287 .collect::<Vec<String>>();
288 if !name.is_empty() {
289 let name = name[0].trim_start_matches("name=");
290 filename = self.encoded(name);
291 }
292 match value.find("boundary=") {
293 None => {}
294 Some(i) => {
295 boundary = &value[i + 9..];
296 boundary = match boundary.find(";") {
297 None => boundary,
298 Some(i) => &boundary[..i],
299 };
300 boundary = boundary.trim_start_matches("\"").trim_end_matches("\"");
301 }
302 }
303 }
304 "content-id"
305 | "content-length"
306 | "mime-version"
307 | "content-description"
308 | "date"
309 | "x-attachment-id" => {}
310 "content-disposition" => {
311 if filename.is_empty() && value.contains("filename=") {
312 filename = value.split("filename=").collect::<Vec<&str>>()[1]
313 .trim_start_matches("\"")
314 .trim_end_matches("\"")
315 .to_string();
316 }
317 if filename.is_empty() && value.contains("filename*=utf-8''") {
318 filename = value.split("filename*=utf-8''").collect::<Vec<&str>>()[1]
319 .trim_start_matches("\"")
320 .trim_end_matches("\"")
321 .to_string();
322 filename = br_crypto::encoding::urlencoding_decode(filename.as_str());
323 }
324 }
325 _ => {
326 return Err(Error::new(
327 ErrorKind::NotFound,
328 format!("parts 未知 header 类型: {name} [{item}]"),
329 ));
330 }
331 }
332 }
333
334 match content_type {
335 "text/plain" => {
336 if filename.is_empty() {
337 let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
338 let text = code_to_utf8(self.charset.as_str(), res.clone());
339 self.body_text = text;
340 } else {
341 self.set_files(content_transfer_encoding, body, filename.as_str(), "".to_string())?;
342 }
343 }
344 "text/html" | "text/x-amp-html" => {
345 if filename.is_empty() {
346 let res = content_transfer_encoding.decode(body.as_bytes().to_vec())?;
347 self.body_html = code_to_utf8(self.charset.as_str(), res.clone());
348 } else {
349 self.set_files(content_transfer_encoding, body, filename.as_str(), "".to_string())?;
350 }
351 }
352 "multipart/mixed" | "multipart/alternative" | "multipart/related" => {
353 let data = self
354 .content_transfer_encoding
355 .decode(body.as_bytes().to_vec())?;
356 let mut parts = code_to_utf8(self.charset.as_str(), data.clone());
357
358 parts = match parts.find(self.boundary.as_str()) {
359 None => parts,
360 Some(e) => parts[e..].to_string(),
361 };
362
363 let mut parts_list = vec![];
364 let mut text = String::new();
365 for item in parts.lines() {
366 if item.contains(boundary) && text.is_empty() {
367 continue;
368 }
369 if item.contains(boundary) && !text.is_empty() {
370 parts_list.push(text);
371 text = String::new();
372 continue;
373 }
374 text = format!("{text}{item}\r\n");
375 }
376 for part in parts_list {
377 if part.trim().is_empty() {
378 continue;
379 }
380 self.parts(part.to_string(), old_data.clone())?;
381 }
382 }
383 "text/calendar" => {}
384 "application/octet-stream"
385 | "application/zip"
386 | "application/pdf"
387 | "image/jpeg"
388 | "application/ics"
389 | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => {
390 if !filename.is_empty() {
391 self.set_files(
392 content_transfer_encoding,
393 body,
394 filename.as_str(),
395 content_type.to_string(),
396 )?;
397 }
398 }
399 _ => {
400 if self.debug {
401 fs::write(
402 format!(
403 "{}/content_type-{}.eml",
404 env::current_dir().unwrap().to_str().unwrap(),
405 self.md5
406 ),
407 old_data.clone(),
408 )?;
409 }
410 return Err(Error::new(
411 ErrorKind::NotFound,
412 format!("未知 parts content_type 类型: {content_type}"),
413 ));
414 }
415 }
416 Ok(())
417 }
418 pub fn from(&mut self, value: &str) -> HashMap<String, String> {
419 let mut r = value
420 .split("<")
421 .filter(|x| !x.trim().is_empty())
422 .map(|x| x.trim())
423 .collect::<Vec<&str>>();
424 if r[0].starts_with("\"") && r[0].ends_with("\"") {
425 r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"").trim();
426 }
427 let mut emails = HashMap::new();
428 if r.len() == 1 {
429 let name = r[0].trim_end_matches(">").to_string();
430 emails.insert(name.clone(), name);
431 } else {
432 let name = self.encoded(r[0].trim());
433 let email = r[1].trim_end_matches(">").to_string();
434 emails.insert(email, name);
435 }
436 emails
437 }
438 fn subject(&mut self, value: String) -> String {
439 let value = value.replace("?==?", "?=\r\n\t=?");
440 if !value.contains("=?") && !value.contains("?=") {
441 return value.to_string();
442 }
443 let list = value.split("\r\n\t").collect::<Vec<&str>>();
444 let mut txt = vec![];
445 for item in list {
446 txt.push(self.encoded(item));
447 }
448 txt.join("")
449 }
450
451 fn encoded(&mut self, value: &str) -> String {
452 let t = value.trim_start_matches("\"").trim_end_matches("\"");
453 if t.contains("=?") && t.contains("?=") {
454 let l = t.split(" ").collect::<Vec<&str>>();
455 let mut txt = vec![];
456 for item in l {
457 txt.push(self.encoded_line(item));
458 }
459 txt.join("")
460 } else {
461 t.to_string()
462 }
463 }
464 fn encoded_line(&mut self, value: &str) -> String {
466 let line = value.split("?").collect::<Vec<&str>>();
467 if line.len() == 1 {
468 return value.to_string();
469 }
470 let charset = line[1].to_lowercase().to_string().leak();
471 let code = line[2].to_uppercase();
472 let data = line[3];
473
474 let strs = match code.as_str() {
475 "B" => br_crypto::base64::decode_u8(data),
476 "Q" => br_crypto::qp::decode(data).unwrap_or(vec![]),
477 _ => data.as_bytes().to_vec(),
478 };
479 let text = code_to_utf8(charset, strs.clone());
480 text.chars().filter(|&x| x != '\u{200b}').collect()
481 }
482
483 fn datetime(&mut self, value: &str) -> io::Result<()> {
485 let re = Regex::new(r"\s*\(.*\)$").unwrap();
486 let datetime = re.replace(value, "").to_string();
487 let datetime = datetime.replace("GMT", "+0000").to_string();
488 let datetime = match datetime.find(",") {
489 None => datetime,
490 Some(i) => datetime[i + 1..].parse().unwrap(),
491 };
492 let datetime = match DateTime::parse_from_str(datetime.as_str(), "%d %b %Y %H:%M:%S %z") {
493 Ok(e) => e,
494 Err(e) => {
495 return Err(Error::other(
496 format!("时间解析失败: {e} [{datetime:?}]"),
497 ))
498 }
499 };
500 self.timestamp = datetime.timestamp();
501 self.datetime = Local
502 .timestamp_opt(self.timestamp, 0)
503 .unwrap()
504 .with_timezone(&Local)
505 .format("%Y-%m-%d %H:%M:%S")
506 .to_string();
507 Ok(())
508 }
509 pub fn email_encoded(&mut self, value: &str) -> HashMap<String, String> {
510 let list = value.split(",").map(|x| x.trim()).collect::<Vec<&str>>();
511 let mut emails = HashMap::new();
512 for item in list {
513 let mut r = item.split(" <").collect::<Vec<&str>>();
514 if r[0].starts_with("\"") && r[0].ends_with("\"") {
515 r[0] = r[0].trim_start_matches("\"").trim_end_matches("\"");
516 }
517 if r.len() == 1 {
518 let name = r[0]
519 .trim_start_matches("<")
520 .trim_end_matches(">")
521 .to_string();
522 emails.insert(name.clone(), name);
523 } else {
524 let name = self.encoded(r[0].trim());
525 let email = r[1].trim_end_matches(">").to_string();
526 emails.insert(email, name);
527 }
528 }
529 emails
530 }
531 fn set_files(
532 &mut self,
533 mut content_transfer_encoding: ContentTransferEncoding,
534 body: &str,
535 filename: &str,
536 mut content_type: String,
537 ) -> io::Result<()> {
538 let mut data = "";
539 if let ContentTransferEncoding::Base64 = content_transfer_encoding {
540 let mut text = "".to_string();
541 for line in body.lines() {
542 text += line;
543 }
544 data = text.leak();
545 }
546
547 let body = content_transfer_encoding.decode(data.as_bytes().to_vec())?;
548 let md5 = br_crypto::md5::encrypt_hex(&body.clone());
549 let size = body.len();
550 let mut temp_dir = env::temp_dir();
551 temp_dir.push(filename);
552 let path_temp_dir=temp_dir.clone();
553
554 let mut temp_file = match fs::File::create(temp_dir.clone()) {
555 Ok(e) => e,
556 Err(e) => {
557 return Err(Error::other(
558 format!("打开(创建)临时文件: {e} [{filename}]"),
559 ))
560 }
561 };
562
563 if temp_file.write(body.as_slice()).is_ok() {
564 if content_type.is_empty() {
565 content_type = path_temp_dir.extension()
566 .unwrap_or(OsStr::new("unknown"))
567 .to_str()
568 .unwrap_or("unknown").to_string();
569 }
570
571 self.files[md5.as_str()] = object! {
572 name:filename,
573 md5:md5.clone(),
574 size:size,
575 "content-type":content_type.clone(),
576 file:temp_dir.to_str()
577 };
578 };
579 Ok(())
580 }
581}
582
583impl Default for AnalyzeEmails {
584 fn default() -> Self {
585 Self {
586 debug: false,
587 header: Default::default(),
588 mime_version: "".to_string(),
589 boundary: "".to_string(),
590 md5: "".to_string(),
591 size: 0,
592 timestamp: 0,
593 datetime: "".to_string(),
594 subject: "".to_string(),
595 from: Default::default(),
596 to: Default::default(),
597 cc: Default::default(),
598 replyto: Default::default(),
599 content_type: "".to_string(),
600 content_transfer_encoding: ContentTransferEncoding::None,
601 sender: "".to_string(),
602 body_text: "".to_string(),
603 body_html: "".to_string(),
604 files: JsonValue::Null,
605 charset: "".to_string(),
606 }
607 }
608}
609
610#[derive(Debug)]
617pub enum ContentTransferEncoding {
618 QuotedPrintable,
621 Base64,
624 Binary,
627 Bit8,
630 Bit7,
633 None,
634}
635
636impl ContentTransferEncoding {
637 fn from(value: &str) -> Self {
638 match value.to_lowercase().as_str() {
639 "7bit" => Self::Bit7,
640 "8bit" => Self::Bit8,
641 "binary" => Self::Binary,
642 "base64" => Self::Base64,
643 "quoted-printable" => Self::QuotedPrintable,
644 _ => Self::None,
645 }
646 }
647 fn decode(&mut self, mut data: Vec<u8>) -> io::Result<Vec<u8>> {
648 let res = match self {
649 ContentTransferEncoding::QuotedPrintable => br_crypto::qp::decode(data)?,
650 ContentTransferEncoding::Base64 => {
651 let str = unsafe { String::from_utf8_unchecked(data) };
652 let mut text = "".to_string();
653 for line in str.lines() {
654 text += line;
655 }
656 data = text.leak().as_bytes().to_vec();
657 br_crypto::base64::decode_u8(data)
658 }
659 ContentTransferEncoding::Binary => data,
660 ContentTransferEncoding::Bit8 => data,
661 ContentTransferEncoding::Bit7 => data,
662 ContentTransferEncoding::None => data,
663 };
664 Ok(res)
665 }
666}