gchemol_parser/
reader.rs

1// [[file:../parser.note::8e7e8fdf][8e7e8fdf]]
2use gut::fs::*;
3use gut::prelude::*;
4
5use std::io::Cursor;
6// 8e7e8fdf ends here
7
8// [[file:../parser.note::3f27d680][3f27d680]]
9type FileReader = BufReader<File>;
10
11fn text_file_reader<P: AsRef<Path>>(p: P) -> Result<FileReader> {
12    let p = p.as_ref();
13    debug!("Reader for file: {}", p.display());
14    let f = File::open(p).with_context(|| format!("Failed to open file {:?}", p))?;
15
16    let reader = BufReader::new(f);
17    Ok(reader)
18}
19
20#[derive(Debug)]
21/// A stream reader for large text file
22pub struct TextReader<R> {
23    inner: R,
24}
25
26impl TextReader<FileReader> {
27    /// Build a text reader for file from path `p`.
28    pub fn try_from_path(p: &Path) -> Result<Self> {
29        let reader = text_file_reader(p)?;
30        let parser = Self { inner: reader };
31        Ok(parser)
32    }
33}
34
35impl<'a> TextReader<Cursor<&'a str>> {
36    /// Build a text reader for string slice.
37    pub fn from_str(s: &'a str) -> Self {
38        let r = Cursor::new(s);
39        TextReader { inner: r }
40    }
41}
42
43impl<R: Read> TextReader<BufReader<R>> {
44    /// Build a text reader from a struct implementing Read trait.
45    pub fn new(r: R) -> Self {
46        Self { inner: BufReader::new(r) }
47    }
48}
49
50impl<R: BufRead> TextReader<R> {
51    /// Read a new line into buf.
52    ///
53    /// # NOTE
54    /// - This function will return the total number of bytes read.
55    /// - If this function returns Ok(0), the stream has reached EOF.
56    pub fn read_line(&mut self, buf: &mut String) -> Result<usize> {
57        self.inner.read_line(buf).map_err(|e| anyhow!("Read line failure"))
58    }
59
60    /// Returns an iterator over the lines of this reader. Each string returned
61    /// will not have a line ending.
62    pub fn lines(self) -> impl Iterator<Item = String> {
63        // silently ignore UTF-8 error
64        self.inner.lines().filter_map(|s| if let Ok(line) = s { Some(line) } else { None })
65    }
66
67    /// Read all text into string `buf` (Note: out of memory issue for large
68    /// file)
69    pub fn read_to_string(&mut self, buf: &mut String) -> Result<usize> {
70        let n = self.inner.read_to_string(buf)?;
71        Ok(n)
72    }
73}
74// 3f27d680 ends here
75
76// [[file:../parser.note::95fe0e8a][95fe0e8a]]
77use std::io::SeekFrom;
78
79impl<R: BufRead + Seek> TextReader<R> {
80    /// Peek next line without moving cursor.
81    pub fn peek_line(&mut self) -> Result<String> {
82        let mut buf = String::new();
83        if let Ok(n) = self.inner.read_line(&mut buf) {
84            self.goto_relative(-1 * n as i64).expect("peek line go back");
85        }
86        Ok(buf)
87    }
88
89    /// Skip reading until finding a matched line. Return the number
90    /// of bytes read in before the matched line. Return error if not
91    /// found.
92    pub fn seek_line<F>(&mut self, mut f: F) -> Result<usize>
93    where
94        F: FnMut(&str) -> bool,
95    {
96        let mut line = String::new();
97        let mut m = 0;
98        loop {
99            let n = self.inner.read_line(&mut line)?;
100            if n == 0 {
101                // EOF
102                bail!("no matched line found!");
103            } else {
104                // back to line start position
105                if f(&line) {
106                    let _ = self.goto_relative(-1 * n as i64)?;
107                    return Ok(m);
108                }
109            }
110            m += n;
111            line.clear();
112        }
113
114        Ok(m)
115    }
116
117    /// Read lines into `buf` until `f` closure predicates true. Return
118    /// total bytes read into `buf`.
119    ///
120    /// # NOTE
121    /// - the line matching predicate is not included into `buf`
122    pub fn read_until<F>(&mut self, buf: &mut String, mut f: F) -> Result<usize>
123    where
124        F: FnMut(&str) -> bool,
125    {
126        let mut m = buf.len();
127        loop {
128            let n = self.inner.read_line(buf)?;
129            if n == 0 {
130                // EOF
131                bail!("no matched line found!");
132            }
133            let line = &buf[m..];
134            if f(line) {
135                self.goto_relative(-1 * n as i64)?;
136                buf.drain(m..);
137                return Ok(m);
138            }
139            m += n;
140        }
141
142        Ok(m)
143    }
144
145    /// Goto the start of inner file.
146    pub fn goto_start(&mut self) {
147        self.inner.rewind();
148    }
149
150    /// Goto the end of inner file.
151    pub fn goto_end(&mut self) {
152        self.inner.seek(SeekFrom::End(0));
153    }
154
155    /// Returns the current seek position from the start of the stream.
156    pub fn get_current_position(&mut self) -> Result<u64> {
157        let pos = self.inner.stream_position()?;
158        Ok(pos)
159    }
160
161    /// Goto to an absolute position, in bytes, in a text stream.
162    pub fn goto(&mut self, pos: u64) -> Result<()> {
163        let pos = self.inner.seek(SeekFrom::Start(pos))?;
164        Ok(())
165    }
166
167    /// Sets the offset to the current position plus the specified
168    /// number of bytes. If the seek operation completed successfully,
169    /// this method returns the new position from the start of the
170    /// stream.
171    pub fn goto_relative(&mut self, offset: i64) -> Result<u64> {
172        let pos = self.inner.seek(SeekFrom::Current(offset))?;
173        Ok(pos)
174    }
175}
176// 95fe0e8a ends here
177
178// [[file:../parser.note::b7e82299][b7e82299]]
179#[test]
180fn test_reader() -> Result<()> {
181    // test lines
182    let f = "./tests/files/multi.xyz";
183    let reader = TextReader::try_from_path(f.as_ref())?;
184    let line = reader.lines().skip(1).next().unwrap();
185    assert_eq!(line, " Configuration number :        7");
186
187    // test seeking
188    let f = "./tests/files/ch3f.mol2";
189    let mut reader = TextReader::try_from_path(f.as_ref())?;
190    let _ = reader.seek_line(|line| line.starts_with("@<TRIPOS>"))?;
191    let line = reader.lines().next().expect("ch3f test");
192    assert_eq!(line, "@<TRIPOS>MOLECULE");
193
194    // test from_str
195    let s = "abc\nabcd\r\nabcde\n";
196    let reader = TextReader::from_str(s);
197    let line = reader.lines().next().unwrap();
198    assert_eq!(line, "abc");
199
200    // test read line until
201    let s = "abc\nhere\r\nabcde\nhere\n\r";
202    let mut reader = TextReader::from_str(s);
203    let mut buf = String::new();
204    let n = reader.read_until(&mut buf, |line| line.starts_with("here"))?;
205    assert_eq!(buf, "abc\n");
206    buf.clear();
207    reader.read_line(&mut buf);
208    assert_eq!(buf, "here\r\n");
209
210    Ok(())
211}
212// b7e82299 ends here