mbox_reader/
lib.rs

1use std::fmt::{self, Debug, Formatter};
2use std::path::Path;
3use std::{fs, io, str};
4
5pub struct MboxReader<'a> {
6    data: &'a MboxFile,
7    idx: usize,
8    prev: usize,
9    testing: usize,
10}
11
12impl<'a> MboxReader<'a> {
13    fn new(map: &MboxFile) -> MboxReader {
14        MboxReader {
15            data: map,
16            idx: 0,
17            prev: 0,
18            testing: 5,
19        }
20    }
21}
22
23impl<'a> Iterator for MboxReader<'a> {
24    type Item = Entry<'a>;
25    fn next(&mut self) -> Option<Self::Item> {
26        let bytes = self.data.as_slice();
27        if self.idx >= self.data.len() {
28            return None;
29        }
30        for b in &bytes[self.idx..] {
31            if *b == b'\n' {
32                self.testing = 5;
33                self.idx += 1;
34                continue;
35            } else if self.testing == 5 && *b == b'F' {
36                self.testing = 4;
37            } else if self.testing == 4 && *b == b'r' {
38                self.testing = 3;
39            } else if self.testing == 3 && *b == b'o' {
40                self.testing = 2;
41            } else if self.testing == 2 && *b == b'm' {
42                self.testing = 1;
43            } else if self.testing == 1 && *b == b' ' {
44                self.testing = 0;
45                let start = self.idx - 4;
46                if start != 0 {
47                    let entry = Entry {
48                        idx: start,
49                        bytes: &bytes[self.prev..start],
50                    };
51                    self.prev = start;
52                    return Some(entry);
53                }
54            } else {
55                self.testing = 0;
56            }
57            self.idx += 1;
58        }
59        None
60    }
61}
62
63/// The mbox file to read. This uses the OS facility to memory-map the file in
64/// order to read it efficiently.
65pub struct MboxFile {
66    map: memmap::Mmap,
67}
68
69impl MboxFile {
70    pub fn from_file(name: &Path) -> io::Result<MboxFile> {
71        Ok(MboxFile {
72            map: unsafe { memmap::Mmap::map(&fs::File::open(name)?)? },
73        })
74    }
75    fn len(&self) -> usize {
76        self.map.len()
77    }
78    fn as_slice(&self) -> &[u8] {
79        &self.map
80    }
81    pub fn iter<'a>(&'a self) -> MboxReader<'a> {
82        MboxReader::new(self)
83    }
84}
85
86pub struct Entry<'a> {
87    idx: usize,
88    bytes: &'a [u8],
89}
90
91impl<'a> Entry<'a> {
92    pub fn offset(&self) -> usize {
93        self.idx
94    }
95    pub fn start(&self) -> Start {
96        match self.bytes.iter().position(|b| *b == b'\n') {
97            Some(pos) => Start::new(&self.bytes[..pos - 1]),
98            None => Start::new(&self.bytes),
99        }
100    }
101    pub fn message(&self) -> Option<&[u8]> {
102        self.bytes
103            .iter()
104            .position(|b| *b == b'\n')
105            .and_then(|idx| Some(&self.bytes[idx + 1..]))
106    }
107}
108
109impl<'a> Debug for Entry<'a> {
110    fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
111        f.write_fmt(format_args!(
112            "Entry {{ {} bytes @ {} }}",
113            self.bytes.len(),
114            self.idx
115        ))
116    }
117}
118
119pub struct Start<'a> {
120    bytes: &'a [u8],
121    address: &'a str,
122    date: &'a str,
123}
124
125impl<'a> Start<'a> {
126    fn new(bytes: &'a [u8]) -> Start {
127        let mut parts = bytes.splitn(3, |b| *b == b' ');
128        let _ = parts.next();
129        let address = str::from_utf8(parts.next().unwrap()).unwrap();
130        let date = str::from_utf8(parts.next().unwrap()).unwrap();
131        Start {
132            bytes,
133            address,
134            date,
135        }
136    }
137    pub fn address(&self) -> &str {
138        self.address
139    }
140    pub fn date(&self) -> &str {
141        self.date
142    }
143    pub fn as_str(&self) -> &str {
144        str::from_utf8(self.bytes).unwrap()
145    }
146}