tiny_msg/
lib.rs

1use std::{
2    fmt::Debug,
3    io::{Cursor, Read, Seek},
4    path::{Path, PathBuf},
5};
6
7use cfb::CompoundFile;
8
9use chrono::{DateTime, Utc};
10use compressed_rtf::decompress_rtf;
11use thiserror::Error;
12
13#[derive(Error, Debug)]
14pub enum MsgError {
15    #[error("IO error: {0}")]
16    IO(#[from] std::io::Error),
17    #[error("Format error: {0}")]
18    Fmt(#[from] std::fmt::Error),
19    #[error("Encoding error")]
20    Encoding,
21    #[error("Unknown error")]
22    Unknown,
23}
24
25type Result<S> = std::result::Result<S, MsgError>;
26
27/// A low-level API for reading data from a .msg file.
28pub struct MsgReader<'c, 'p, F> {
29    inner: &'c mut CompoundFile<F>,
30    path: &'p Path,
31}
32
33#[derive(Clone)]
34pub struct Attachment {
35    pub name: String,
36    pub data: Vec<u8>,
37}
38
39impl Debug for Attachment {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        f.debug_struct("Attachment")
42            .field("name", &self.name)
43            .field("data of size", &self.data.len())
44            .finish()
45    }
46}
47
48/// A high-level API for interacting with .msg files, providing an owned data structure.
49#[derive(Debug, Clone)]
50pub struct Email {
51    pub from: Option<(String, String)>,
52    pub sent_date: Option<chrono::DateTime<Utc>>,
53    pub to: Vec<(String, String)>,
54    pub cc: Vec<(String, String)>,
55    pub bcc: Vec<(String, String)>,
56    pub subject: Option<String>,
57    pub body: Option<String>,
58    pub attachments: Vec<Attachment>,
59    pub embedded_messages: Vec<Email>,
60}
61
62impl Email {
63    pub fn from_path<P: AsRef<Path>>(file: P) -> Self {
64        Self::from_path_internal(file.as_ref(), Path::new("/"))
65    }
66    pub fn from_bytes<B: AsRef<[u8]>>(bytes: B) -> Self {
67        Self::from_bytes_internal(bytes.as_ref(), Path::new("/"))
68    }
69
70    fn from_path_internal(file: &Path, subpath: &Path) -> Self {
71        let mut comp = cfb::open(file).unwrap();
72        let mut reader = MsgReader::new(&mut comp, subpath);
73        let from = reader.from().ok();
74        let sent_date = reader.sent_date().ok();
75        let to = reader.to().unwrap_or_default();
76        let cc = reader.cc().unwrap_or_default();
77        let bcc = reader.bcc().unwrap_or_default();
78        let subject = reader.pr_subject().ok();
79        let body = reader.body().ok();
80        let attachments = reader.attachments().unwrap_or_default();
81        let emb_paths = reader.embedded_messages().unwrap();
82        let embedded_messages: Vec<_> = emb_paths
83            .into_iter()
84            .map(|emb_path| Self::from_path_internal(file, &emb_path))
85            .collect();
86        Self {
87            from,
88            sent_date,
89            to,
90            cc,
91            bcc,
92            subject,
93            body,
94            attachments,
95            embedded_messages,
96        }
97    }
98    fn from_bytes_internal(bytes: &[u8], subpath: &Path) -> Self {
99        let cur = Cursor::new(bytes);
100        let mut comp = CompoundFile::open(cur).unwrap();
101        let mut reader = MsgReader::new(&mut comp, subpath);
102        let from = reader.from().ok();
103        let sent_date = reader.sent_date().ok();
104        let to = reader.to().unwrap_or_default();
105        let cc = reader.cc().unwrap_or_default();
106        let bcc = reader.bcc().unwrap_or_default();
107        let subject = reader.pr_subject().ok();
108        let body = reader.body().ok();
109        let attachments = reader.attachments().unwrap_or_default();
110        let emb_paths = reader.embedded_messages().unwrap();
111        let embedded_messages: Vec<_> = emb_paths
112            .into_iter()
113            .map(|emb_path| Self::from_bytes_internal(bytes, &emb_path))
114            .collect();
115        Self {
116            from,
117            sent_date,
118            to,
119            cc,
120            bcc,
121            subject,
122            body,
123            attachments,
124            embedded_messages,
125        }
126    }
127}
128
129impl<'c, 'p, F> MsgReader<'c, 'p, F>
130where
131    F: Read + Seek,
132{
133    pub fn new(inner: &'c mut CompoundFile<F>, path: &'p Path) -> Self {
134        Self { inner, path }
135    }
136
137    fn read_simple_string(&mut self, prop: &str) -> Result<String> {
138        let mut content = self
139            .inner
140            .open_stream(self.path.join(format!("__substg1.0_{prop}001F")))?;
141        let mut buf = vec![];
142        content.read_to_end(&mut buf).unwrap();
143        String::from_utf16(&pack_u8s_to_u16s_le_padded(&buf))
144            .map_err(|_e| MsgError::Encoding)
145            .map(|x| x.trim_end_matches('\0').to_string())
146    }
147    fn read_simple_binary(&mut self, prop: &str) -> Result<Vec<u8>> {
148        let mut content = self
149            .inner
150            .open_stream(self.path.join(format!("__substg1.0_{prop}0102")))?;
151        let mut buf = vec![];
152        content.read_to_end(&mut buf).unwrap();
153        Ok(buf)
154    }
155    pub fn read_path_as_binary(&mut self, subpath: &Path) -> Result<Vec<u8>> {
156        let mut content = self.inner.open_stream(self.path.join(subpath))?;
157        let mut buf = vec![];
158        content.read_to_end(&mut buf).unwrap();
159        Ok(buf)
160    }
161    pub fn read_path_as_string(&mut self, subpath: &Path) -> Result<String> {
162        let mut content = self.inner.open_stream(self.path.join(subpath))?;
163        let mut buf = vec![];
164        content.read_to_end(&mut buf).unwrap();
165        String::from_utf16(&pack_u8s_to_u16s_le_padded(&buf))
166            .map_err(|_e| MsgError::Encoding)
167            .map(|x| x.trim_end_matches('\0').to_string())
168    }
169    pub fn pr_subject(&mut self) -> Result<String> {
170        self.read_simple_string("0037") // PR_SUBJECT
171    }
172    pub fn pr_sender_name(&mut self) -> Result<String> {
173        self.read_simple_string("0C1A")
174    }
175    pub fn pr_sender_email_adress_str(&mut self) -> Result<String> {
176        self.read_simple_string("0C19")
177    }
178    pub fn pr_smtp_sender_address(&mut self) -> Result<String> {
179        self.read_simple_string("5D01")
180    }
181    pub fn pr_smtp_address(&mut self) -> Result<String> {
182        self.read_simple_string("39FE")
183    }
184    pub fn sender_address(&mut self) -> Result<String> {
185        self.pr_sender_email_adress_str()
186            .or_else(|_| self.pr_smtp_address())
187            .or_else(|_| self.pr_smtp_sender_address())
188    }
189    pub fn from(&mut self) -> Result<(String, String)> {
190        Ok((self.pr_sender_name()?, self.sender_address()?))
191    }
192    pub fn pr_transport_message_headers(&mut self) -> Result<String> {
193        self.read_simple_string("007D")
194    }
195    pub fn pr_body_html(&mut self) -> Result<String> {
196        let bin = self.read_simple_binary("1013")?;
197        String::from_utf8(bin).map_err(|_| MsgError::Encoding)
198    }
199    pub fn pr_rtf_compressed(&mut self) -> Result<Vec<u8>> {
200        self.read_simple_binary("1009")
201    }
202    fn rtf(&mut self) -> Result<String> {
203        self.pr_rtf_compressed()
204            .and_then(|comp_rtf| decompress_rtf(&comp_rtf).map_err(|_| MsgError::Encoding))
205    }
206    pub fn body(&mut self) -> Result<String> {
207        self.pr_body_html().or_else(|_| self.rtf())
208    }
209    pub fn sent_date(&mut self) -> Result<DateTime<Utc>> {
210        let headers = self.pr_transport_message_headers()?;
211        let dateline = headers
212            .lines()
213            .find(|x| x.starts_with("Date"))
214            .ok_or(MsgError::Encoding)?
215            .split_once(": ")
216            .ok_or(MsgError::Encoding)?
217            .1;
218        chrono::DateTime::parse_from_rfc2822(dateline)
219            .map_err(|_| MsgError::Encoding)
220            .map(|d| d.with_timezone(&Utc))
221    }
222    fn recipients(&mut self) -> Result<Vec<(String, String)>> {
223        let recip_paths: Vec<_> = self
224            .inner
225            .read_storage(self.path)?
226            .filter(|x| x.name().starts_with("__recip_version1.0_"))
227            .map(|r| r.path().to_owned())
228            .collect();
229        recip_paths
230            .iter()
231            .map(|r| {
232                let name = self.read_path_as_string(&r.join("__substg1.0_3001001F"))?;
233                let address = self.read_path_as_string(&r.join("__substg1.0_39FE001F"))?;
234                Ok((name, address))
235            })
236            .collect()
237    }
238    pub fn to(&mut self) -> Result<Vec<(String, String)>> {
239        let to_field = self.read_simple_string("0E04")?;
240        let to_list: Vec<_> = to_field.split(";").map(|n| n.trim()).collect();
241        let output: Vec<(String, String)> = self
242            .recipients()?
243            .into_iter()
244            .filter(|(k, _v)| to_list.contains(&&k[..]))
245            .collect();
246        Ok(output)
247    }
248    pub fn cc(&mut self) -> Result<Vec<(String, String)>> {
249        let cc_field = self.read_simple_string("0E03")?;
250        let cc_list: Vec<_> = cc_field.split(";").map(|n| n.trim()).collect();
251        let output: Vec<(String, String)> = self
252            .recipients()?
253            .into_iter()
254            .filter(|(k, _v)| cc_list.contains(&&k[..]))
255            .collect();
256        Ok(output)
257    }
258    pub fn bcc(&mut self) -> Result<Vec<(String, String)>> {
259        let bcc_field = self.read_simple_string("0E02")?;
260        let bcc_list: Vec<_> = bcc_field.split(";").map(|n| n.trim()).collect();
261        let output: Vec<(String, String)> = self
262            .recipients()?
263            .into_iter()
264            .filter(|(k, _v)| bcc_list.contains(&&k[..]))
265            .collect();
266        Ok(output)
267    }
268    pub fn attachments(&mut self) -> Result<Vec<Attachment>> {
269        let attachment_paths: Vec<_> = self
270            .inner
271            .read_storage(self.path)?
272            .filter(|x| x.name().starts_with("__attach_version1.0_"))
273            .map(|r| r.path().to_owned())
274            .collect();
275        let res = attachment_paths
276            .iter()
277            .flat_map(|a| {
278                let name = self
279                    .read_path_as_string(&a.join("__substg1.0_3704001F"))
280                    .or_else(|_| self.read_path_as_string(&a.join("__substg1.0_3001001F")))?;
281                let data = self.read_path_as_binary(&a.join("__substg1.0_37010102"))?;
282                let output: Result<Attachment> = Ok(Attachment { name, data });
283                output
284            })
285            .collect();
286        Ok(res)
287    }
288    pub fn embedded_messages(&mut self) -> Result<Vec<PathBuf>> {
289        let attachment_paths: Vec<_> = self
290            .inner
291            .read_storage(self.path)?
292            .filter(|x| x.name().starts_with("__attach_version1.0_"))
293            .map(|r| r.path().to_owned())
294            .collect();
295        let res = attachment_paths
296            .into_iter()
297            .map(|a| a.join("__substg1.0_3701000D"))
298            .filter(|a| self.inner.is_storage(a))
299            .collect();
300        Ok(res)
301    }
302}
303
304fn pack_u8s_to_u16s_le_padded(bytes: &[u8]) -> Vec<u16> {
305    let mut result = Vec::with_capacity(bytes.len().div_ceil(2));
306    let mut i = 0;
307    while i < bytes.len() {
308        let lsb = bytes[i];
309        let msb = if i + 1 < bytes.len() {
310            bytes[i + 1]
311        } else {
312            // Pad with zero if there's an odd number of bytes
313            0x00
314        };
315        result.push(u16::from_le_bytes([lsb, msb]));
316        i += 2; // Move to the next pair
317    }
318    result
319}