Skip to main content

chainfile/
reader.rs

1//! A chain file reader.
2
3use std::io::BufRead;
4use std::io::{self};
5use std::iter;
6
7use thiserror::Error;
8
9use crate::Line;
10use crate::alignment::section::Sections;
11use crate::line;
12
13/// The new line character.
14const NEW_LINE: char = '\n';
15
16/// The carriage return character.
17const CARRIAGE_RETURN: char = '\r';
18
19/// An error related to a [`Reader`].
20#[derive(Debug, Error)]
21pub enum Error {
22    /// An I/O error.
23    #[error("i/o error: {0}")]
24    Io(io::Error),
25
26    /// A line error.
27    #[error("line error: {0}")]
28    Line(line::Error),
29}
30
31/// A chain file reader.
32#[derive(Clone, Debug)]
33pub struct Reader<T>(T)
34where
35    T: BufRead;
36
37impl<T> Reader<T>
38where
39    T: BufRead,
40{
41    /// Creates a chain file reader.
42    ///
43    /// # Examples
44    ///
45    /// ```
46    /// let data = b"chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1\n3\t0\t1\n1";
47    /// let reader = chainfile::Reader::new(&data[..]);
48    /// ```
49    pub fn new(inner: T) -> Self {
50        Self::from(inner)
51    }
52
53    /// Gets a reference to the inner reader.
54    ///
55    /// # Examples
56    ///
57    /// ```
58    /// use std::io;
59    ///
60    /// let data = b"chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1\n3\t0\t1\n1";
61    /// let cursor = io::Cursor::new(data);
62    ///
63    /// let reader = chainfile::Reader::new(cursor);
64    /// assert_eq!(reader.inner().position(), 0);
65    /// ```
66    pub fn inner(&self) -> &T {
67        &self.0
68    }
69
70    /// Gets a mutable reference to the inner reader.
71    ///
72    /// # Examples
73    ///
74    /// ```
75    /// use std::io::Read;
76    ///
77    /// let data = b"chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1\n3\t0\t1\n1";
78    /// let mut reader = chainfile::Reader::new(&data[..]);
79    /// let mut buffer = vec![0; data.len()];
80    ///
81    /// reader.inner_mut().read_exact(&mut buffer).unwrap();
82    /// assert_eq!(buffer, data[..]);
83    /// ```
84    pub fn inner_mut(&mut self) -> &mut T {
85        &mut self.0
86    }
87
88    /// Consumes self and returns the inner reader.
89    ///
90    /// # Examples
91    ///
92    /// ```
93    /// use std::io::BufRead;
94    ///
95    /// let data = b"chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1\n3\t0\t1\n1";
96    /// let reader = chainfile::Reader::new(&data[..]);
97    /// let mut lines = reader.into_inner().lines().map(|line| line.unwrap());
98    ///
99    /// assert_eq!(
100    ///     lines.next(),
101    ///     Some(String::from("chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1"))
102    /// );
103    /// assert_eq!(lines.next(), Some(String::from("3\t0\t1")));
104    /// assert_eq!(lines.next(), Some(String::from("1")));
105    /// assert_eq!(lines.next(), None);
106    /// ```
107    pub fn into_inner(self) -> T {
108        self.0
109    }
110
111    /// Reads a raw, textual line from the underlying reader.
112    ///
113    /// # Examples
114    ///
115    /// ```
116    /// use std::io;
117    ///
118    /// let data = b"chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1\n3\t0\t1\n1";
119    /// let mut reader = chainfile::Reader::new(&data[..]);
120    ///
121    /// let mut buffer = String::new();
122    ///
123    /// assert_eq!(reader.read_line_raw(&mut buffer)?, 36);
124    /// assert_eq!(buffer, "chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1");
125    ///
126    /// assert_eq!(reader.read_line_raw(&mut buffer)?, 6);
127    /// assert_eq!(buffer, "3\t0\t1");
128    ///
129    /// assert_eq!(reader.read_line_raw(&mut buffer)?, 1);
130    /// assert_eq!(buffer, "1");
131    ///
132    /// assert_eq!(reader.read_line_raw(&mut buffer)?, 0);
133    ///
134    /// # Ok::<(), io::Error>(())
135    /// ```
136    pub fn read_line_raw(&mut self, buffer: &mut String) -> io::Result<usize> {
137        read_line(self.inner_mut(), buffer)
138    }
139
140    /// Attempts to read a [`Line`] from the underlying reader.
141    ///
142    /// # Examples
143    ///
144    /// ```
145    /// use std::io;
146    ///
147    /// use chainfile::Line;
148    ///
149    /// let data = b"chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1\n3\t0\t1\n1";
150    /// let mut reader = chainfile::Reader::new(&data[..]);
151    ///
152    /// let mut buffer = String::new();
153    /// assert!(matches!(
154    ///     reader.read_line(&mut buffer)?,
155    ///     Some(Line::Header(_))
156    /// ));
157    /// assert!(matches!(
158    ///     reader.read_line(&mut buffer)?,
159    ///     Some(Line::AlignmentData(_))
160    /// ));
161    /// assert!(matches!(
162    ///     reader.read_line(&mut buffer)?,
163    ///     Some(Line::AlignmentData(_))
164    /// ));
165    /// assert!(matches!(reader.read_line(&mut buffer)?, None));
166    ///
167    /// # Ok::<(), Box<dyn std::error::Error>>(())
168    /// ```
169    pub fn read_line(&mut self, buffer: &mut String) -> Result<Option<Line>, Error> {
170        let read = self.read_line_raw(buffer).map_err(Error::Io)?;
171
172        match read {
173            0 => Ok(None),
174            _ => {
175                let line = buffer.parse::<Line>().map_err(Error::Line)?;
176                Ok(Some(line))
177            }
178        }
179    }
180
181    /// Returns an iterator over the `Line`s in the underlying reader.
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// use std::io::BufRead;
187    ///
188    /// let data = b"chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1\n3\t0\t1\n1";
189    /// let mut reader = chainfile::Reader::new(&data[..]);
190    ///
191    /// let lines = reader.lines().collect::<Vec<_>>();
192    /// assert_eq!(lines.len(), 3);
193    ///
194    /// # Ok::<(), Box<dyn std::error::Error>>(())
195    /// ```
196    pub fn lines(&mut self) -> impl Iterator<Item = io::Result<Line>> + '_ {
197        let mut buffer = String::new();
198
199        iter::from_fn(move || {
200            buffer.clear();
201
202            match self.read_line_raw(&mut buffer) {
203                Ok(0) => None,
204                Ok(_) => Some(
205                    buffer
206                        .parse()
207                        .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)),
208                ),
209                Err(e) => Some(Err(e)),
210            }
211        })
212    }
213
214    /// Returns an iterator over the alignment sections in the underlying
215    /// reader.
216    ///
217    /// # Examples
218    ///
219    /// ```
220    /// let data = b"chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1\n3\t0\t1\n1";
221    /// let mut reader = chainfile::Reader::new(&data[..]);
222    ///
223    /// let sections = reader
224    ///     .sections()
225    ///     .map(|result| result.unwrap())
226    ///     .collect::<Vec<_>>();
227    /// assert_eq!(sections.len(), 1);
228    ///
229    /// # Ok::<(), Box<dyn std::error::Error>>(())
230    /// ```
231    pub fn sections(&mut self) -> Sections<'_, T> {
232        Sections::new(self)
233    }
234}
235
236impl<T> From<T> for Reader<T>
237where
238    T: BufRead,
239{
240    fn from(inner: T) -> Self {
241        Self(inner)
242    }
243}
244
245/// Reads a line from a buffered reader.
246///
247/// This method is copied almost directly from noodles-gtf. I repurposed it
248/// because it captures pretty much exactly what I need to do for this reader.
249fn read_line<T>(reader: &mut T, buffer: &mut String) -> io::Result<usize>
250where
251    T: BufRead,
252{
253    buffer.clear();
254
255    match reader.read_line(buffer) {
256        Ok(0) => Ok(0),
257        Ok(n) => {
258            if buffer.ends_with(NEW_LINE) {
259                buffer.pop();
260
261                if buffer.ends_with(CARRIAGE_RETURN) {
262                    buffer.pop();
263                }
264            }
265
266            Ok(n)
267        }
268        Err(e) => Err(e),
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use std::io;
275
276    use super::*;
277
278    #[test]
279    fn test_read_line() {
280        let data = b"hello\r\nworld!";
281        let mut cursor = io::Cursor::new(data);
282
283        let mut buffer = String::new();
284        let len = read_line(&mut cursor, &mut buffer).unwrap();
285        assert_eq!(buffer, "hello");
286        assert_eq!(len, 7);
287
288        let len = read_line(&mut cursor, &mut buffer).unwrap();
289        assert_eq!(buffer, "world!");
290        assert_eq!(len, 6);
291    }
292}