1use std::{
2 fmt::Debug,
3 io::{Cursor, Read, Seek},
4 path::{Path, PathBuf},
5};
6
7use cfb::CompoundFile;
8
9use chrono::{DateTime, Utc};
10use compressed_rtf::decompress_rtf;
11use thiserror::Error;
12
13#[derive(Error, Debug)]
14pub enum MsgError {
15 #[error("IO error: {0}")]
16 IO(#[from] std::io::Error),
17 #[error("Format error: {0}")]
18 Fmt(#[from] std::fmt::Error),
19 #[error("Encoding error")]
20 Encoding,
21 #[error("Unknown error")]
22 Unknown,
23}
24
25type Result<S> = std::result::Result<S, MsgError>;
26
27pub struct MsgReader<'c, 'p, F> {
29 inner: &'c mut CompoundFile<F>,
30 path: &'p Path,
31}
32
33#[derive(Clone)]
34pub struct Attachment {
35 pub name: String,
36 pub data: Vec<u8>,
37}
38
39impl Debug for Attachment {
40 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 f.debug_struct("Attachment")
42 .field("name", &self.name)
43 .field("data of size", &self.data.len())
44 .finish()
45 }
46}
47
48#[derive(Debug, Clone)]
50pub struct Email {
51 pub from: Option<(String, String)>,
52 pub sent_date: Option<chrono::DateTime<Utc>>,
53 pub to: Vec<(String, String)>,
54 pub cc: Vec<(String, String)>,
55 pub bcc: Vec<(String, String)>,
56 pub subject: Option<String>,
57 pub body: Option<String>,
58 pub attachments: Vec<Attachment>,
59 pub embedded_messages: Vec<Email>,
60}
61
62impl Email {
63 pub fn from_path<P: AsRef<Path>>(file: P) -> Self {
64 Self::from_path_internal(file.as_ref(), Path::new("/"))
65 }
66 pub fn from_bytes<B: AsRef<[u8]>>(bytes: B) -> Self {
67 Self::from_bytes_internal(bytes.as_ref(), Path::new("/"))
68 }
69
70 fn from_path_internal(file: &Path, subpath: &Path) -> Self {
71 let mut comp = cfb::open(file).unwrap();
72 let mut reader = MsgReader::new(&mut comp, subpath);
73 let from = reader.from().ok();
74 let sent_date = reader.sent_date().ok();
75 let to = reader.to().unwrap_or_default();
76 let cc = reader.cc().unwrap_or_default();
77 let bcc = reader.bcc().unwrap_or_default();
78 let subject = reader.pr_subject().ok();
79 let body = reader.body().ok();
80 let attachments = reader.attachments().unwrap_or_default();
81 let emb_paths = reader.embedded_messages().unwrap();
82 let embedded_messages: Vec<_> = emb_paths
83 .into_iter()
84 .map(|emb_path| Self::from_path_internal(file, &emb_path))
85 .collect();
86 Self {
87 from,
88 sent_date,
89 to,
90 cc,
91 bcc,
92 subject,
93 body,
94 attachments,
95 embedded_messages,
96 }
97 }
98 fn from_bytes_internal(bytes: &[u8], subpath: &Path) -> Self {
99 let cur = Cursor::new(bytes);
100 let mut comp = CompoundFile::open(cur).unwrap();
101 let mut reader = MsgReader::new(&mut comp, subpath);
102 let from = reader.from().ok();
103 let sent_date = reader.sent_date().ok();
104 let to = reader.to().unwrap_or_default();
105 let cc = reader.cc().unwrap_or_default();
106 let bcc = reader.bcc().unwrap_or_default();
107 let subject = reader.pr_subject().ok();
108 let body = reader.body().ok();
109 let attachments = reader.attachments().unwrap_or_default();
110 let emb_paths = reader.embedded_messages().unwrap();
111 let embedded_messages: Vec<_> = emb_paths
112 .into_iter()
113 .map(|emb_path| Self::from_bytes_internal(bytes, &emb_path))
114 .collect();
115 Self {
116 from,
117 sent_date,
118 to,
119 cc,
120 bcc,
121 subject,
122 body,
123 attachments,
124 embedded_messages,
125 }
126 }
127}
128
129impl<'c, 'p, F> MsgReader<'c, 'p, F>
130where
131 F: Read + Seek,
132{
133 pub fn new(inner: &'c mut CompoundFile<F>, path: &'p Path) -> Self {
134 Self { inner, path }
135 }
136
137 fn read_simple_string(&mut self, prop: &str) -> Result<String> {
138 let mut content = self
139 .inner
140 .open_stream(self.path.join(format!("__substg1.0_{prop}001F")))?;
141 let mut buf = vec![];
142 content.read_to_end(&mut buf).unwrap();
143 String::from_utf16(&pack_u8s_to_u16s_le_padded(&buf))
144 .map_err(|_e| MsgError::Encoding)
145 .map(|x| x.trim_end_matches('\0').to_string())
146 }
147 fn read_simple_binary(&mut self, prop: &str) -> Result<Vec<u8>> {
148 let mut content = self
149 .inner
150 .open_stream(self.path.join(format!("__substg1.0_{prop}0102")))?;
151 let mut buf = vec![];
152 content.read_to_end(&mut buf).unwrap();
153 Ok(buf)
154 }
155 pub fn read_path_as_binary(&mut self, subpath: &Path) -> Result<Vec<u8>> {
156 let mut content = self.inner.open_stream(self.path.join(subpath))?;
157 let mut buf = vec![];
158 content.read_to_end(&mut buf).unwrap();
159 Ok(buf)
160 }
161 pub fn read_path_as_string(&mut self, subpath: &Path) -> Result<String> {
162 let mut content = self.inner.open_stream(self.path.join(subpath))?;
163 let mut buf = vec![];
164 content.read_to_end(&mut buf).unwrap();
165 String::from_utf16(&pack_u8s_to_u16s_le_padded(&buf))
166 .map_err(|_e| MsgError::Encoding)
167 .map(|x| x.trim_end_matches('\0').to_string())
168 }
169 pub fn pr_subject(&mut self) -> Result<String> {
170 self.read_simple_string("0037") }
172 pub fn pr_sender_name(&mut self) -> Result<String> {
173 self.read_simple_string("0C1A")
174 }
175 pub fn pr_sender_email_adress_str(&mut self) -> Result<String> {
176 self.read_simple_string("0C19")
177 }
178 pub fn pr_smtp_sender_address(&mut self) -> Result<String> {
179 self.read_simple_string("5D01")
180 }
181 pub fn pr_smtp_address(&mut self) -> Result<String> {
182 self.read_simple_string("39FE")
183 }
184 pub fn sender_address(&mut self) -> Result<String> {
185 self.pr_sender_email_adress_str()
186 .or_else(|_| self.pr_smtp_address())
187 .or_else(|_| self.pr_smtp_sender_address())
188 }
189 pub fn from(&mut self) -> Result<(String, String)> {
190 Ok((self.pr_sender_name()?, self.sender_address()?))
191 }
192 pub fn pr_transport_message_headers(&mut self) -> Result<String> {
193 self.read_simple_string("007D")
194 }
195 pub fn pr_body_html(&mut self) -> Result<String> {
196 let bin = self.read_simple_binary("1013")?;
197 String::from_utf8(bin).map_err(|_| MsgError::Encoding)
198 }
199 pub fn pr_rtf_compressed(&mut self) -> Result<Vec<u8>> {
200 self.read_simple_binary("1009")
201 }
202 fn rtf(&mut self) -> Result<String> {
203 self.pr_rtf_compressed()
204 .and_then(|comp_rtf| decompress_rtf(&comp_rtf).map_err(|_| MsgError::Encoding))
205 }
206 pub fn body(&mut self) -> Result<String> {
207 self.pr_body_html().or_else(|_| self.rtf())
208 }
209 pub fn sent_date(&mut self) -> Result<DateTime<Utc>> {
210 let headers = self.pr_transport_message_headers()?;
211 let dateline = headers
212 .lines()
213 .find(|x| x.starts_with("Date"))
214 .ok_or(MsgError::Encoding)?
215 .split_once(": ")
216 .ok_or(MsgError::Encoding)?
217 .1;
218 chrono::DateTime::parse_from_rfc2822(dateline)
219 .map_err(|_| MsgError::Encoding)
220 .map(|d| d.with_timezone(&Utc))
221 }
222 fn recipients(&mut self) -> Result<Vec<(String, String)>> {
223 let recip_paths: Vec<_> = self
224 .inner
225 .read_storage(self.path)?
226 .filter(|x| x.name().starts_with("__recip_version1.0_"))
227 .map(|r| r.path().to_owned())
228 .collect();
229 recip_paths
230 .iter()
231 .map(|r| {
232 let name = self.read_path_as_string(&r.join("__substg1.0_3001001F"))?;
233 let address = self.read_path_as_string(&r.join("__substg1.0_39FE001F"))?;
234 Ok((name, address))
235 })
236 .collect()
237 }
238 pub fn to(&mut self) -> Result<Vec<(String, String)>> {
239 let to_field = self.read_simple_string("0E04")?;
240 let to_list: Vec<_> = to_field.split(";").map(|n| n.trim()).collect();
241 let output: Vec<(String, String)> = self
242 .recipients()?
243 .into_iter()
244 .filter(|(k, _v)| to_list.contains(&&k[..]))
245 .collect();
246 Ok(output)
247 }
248 pub fn cc(&mut self) -> Result<Vec<(String, String)>> {
249 let cc_field = self.read_simple_string("0E03")?;
250 let cc_list: Vec<_> = cc_field.split(";").map(|n| n.trim()).collect();
251 let output: Vec<(String, String)> = self
252 .recipients()?
253 .into_iter()
254 .filter(|(k, _v)| cc_list.contains(&&k[..]))
255 .collect();
256 Ok(output)
257 }
258 pub fn bcc(&mut self) -> Result<Vec<(String, String)>> {
259 let bcc_field = self.read_simple_string("0E02")?;
260 let bcc_list: Vec<_> = bcc_field.split(";").map(|n| n.trim()).collect();
261 let output: Vec<(String, String)> = self
262 .recipients()?
263 .into_iter()
264 .filter(|(k, _v)| bcc_list.contains(&&k[..]))
265 .collect();
266 Ok(output)
267 }
268 pub fn attachments(&mut self) -> Result<Vec<Attachment>> {
269 let attachment_paths: Vec<_> = self
270 .inner
271 .read_storage(self.path)?
272 .filter(|x| x.name().starts_with("__attach_version1.0_"))
273 .map(|r| r.path().to_owned())
274 .collect();
275 let res = attachment_paths
276 .iter()
277 .flat_map(|a| {
278 let name = self
279 .read_path_as_string(&a.join("__substg1.0_3704001F"))
280 .or_else(|_| self.read_path_as_string(&a.join("__substg1.0_3001001F")))?;
281 let data = self.read_path_as_binary(&a.join("__substg1.0_37010102"))?;
282 let output: Result<Attachment> = Ok(Attachment { name, data });
283 output
284 })
285 .collect();
286 Ok(res)
287 }
288 pub fn embedded_messages(&mut self) -> Result<Vec<PathBuf>> {
289 let attachment_paths: Vec<_> = self
290 .inner
291 .read_storage(self.path)?
292 .filter(|x| x.name().starts_with("__attach_version1.0_"))
293 .map(|r| r.path().to_owned())
294 .collect();
295 let res = attachment_paths
296 .into_iter()
297 .map(|a| a.join("__substg1.0_3701000D"))
298 .filter(|a| self.inner.is_storage(a))
299 .collect();
300 Ok(res)
301 }
302}
303
304fn pack_u8s_to_u16s_le_padded(bytes: &[u8]) -> Vec<u16> {
305 let mut result = Vec::with_capacity(bytes.len().div_ceil(2));
306 let mut i = 0;
307 while i < bytes.len() {
308 let lsb = bytes[i];
309 let msb = if i + 1 < bytes.len() {
310 bytes[i + 1]
311 } else {
312 0x00
314 };
315 result.push(u16::from_le_bytes([lsb, msb]));
316 i += 2; }
318 result
319}