Skip to main content

bed_utils/bed/
io.rs

1use crate::bed::BEDLike;
2
3use std::io::{self, BufRead, BufReader, Error, ErrorKind, Read, Write};
4use std::marker::PhantomData;
5use std::str::FromStr;
6
7use super::ParseError;
8
9/// An iterator over records of a FASTQ reader.
10///
11/// This is created by calling [`Reader::records`].
12pub struct Records<'a, B, R> {
13    inner: &'a mut Reader<R>,
14    buf: String,
15    phantom: PhantomData<B>,
16}
17
18impl<'a, B, R> Records<'a, B, R>
19where
20    R: Read,
21    B: FromStr,
22{
23    pub fn new(inner: &'a mut Reader<R>) -> Self {
24        Self {
25            inner,
26            buf: String::new(),
27            phantom: PhantomData,
28        }
29    }
30}
31
32impl<'a, B, R> Iterator for Records<'a, B, R>
33where
34    R: Read,
35    B: FromStr<Err = ParseError>,
36{
37    type Item = io::Result<B>;
38
39    fn next(&mut self) -> Option<Self::Item> {
40        self.buf.clear();
41        match self.inner.read_record(&mut self.buf) {
42            Ok(LineSize::Size(0)) => None,
43            Ok(LineSize::Skip) => self.next(),
44            Ok(_) => Some(
45                self.buf
46                    .parse()
47                    .map_err(|e| Error::new(ErrorKind::Other, format!("{:?}: {}", e, &self.buf))),
48            ),
49            Err(e) => Some(Err(e)),
50        }
51    }
52}
53
54/// An iterator over records of a FASTQ reader.
55///
56/// This is created by calling [`Reader::records`].
57pub struct IntoRecords<B, R> {
58    inner: Reader<R>,
59    buf: String,
60    phantom: PhantomData<B>,
61}
62
63impl<B, R> IntoRecords<B, R>
64where
65    R: Read,
66    B: FromStr,
67{
68    pub fn new(inner: Reader<R>) -> Self {
69        Self {
70            inner,
71            buf: String::new(),
72            phantom: PhantomData,
73        }
74    }
75}
76
77impl<B, R> Iterator for IntoRecords<B, R>
78where
79    R: Read,
80    B: FromStr<Err = ParseError>,
81{
82    type Item = io::Result<B>;
83
84    fn next(&mut self) -> Option<Self::Item> {
85        self.buf.clear();
86        match self.inner.read_record(&mut self.buf) {
87            Ok(LineSize::Size(0)) => None,
88            Ok(LineSize::Skip) => self.next(),
89            Ok(_) => Some(
90                self.buf
91                    .parse()
92                    .map_err(|e| Error::new(ErrorKind::Other, format!("{:?}: {}", e, &self.buf))),
93            ),
94            Err(e) => Some(Err(e)),
95        }
96    }
97}
98
99/// A BED reader.
100pub struct Reader<R> {
101    inner: BufReader<R>,
102    skip_start_with: Option<String>,
103}
104
105impl<R> Reader<R>
106where
107    R: Read,
108{
109    /// Creates a BED reader.
110    pub fn new(inner: R, skip_start_with: Option<String>) -> Self {
111        Self {
112            inner: BufReader::new(inner),
113            skip_start_with,
114        }
115    }
116
117    /// Reads a single raw BED record.
118    pub fn read_record(&mut self, buf: &mut String) -> io::Result<LineSize> {
119        let size = read_line(&mut self.inner, buf)?;
120        if size > 0
121            && self
122                .skip_start_with
123                .as_ref()
124                .map_or(false, |x| buf.starts_with(x))
125        {
126            Ok(LineSize::Skip)
127        } else {
128            Ok(LineSize::Size(size))
129        }
130    }
131
132    /// Returns an iterator over records starting from the current stream position.
133    ///
134    /// The stream is expected to be at the start of a record.
135    ///
136    pub fn records<B: FromStr + BEDLike>(&mut self) -> Records<'_, B, R> {
137        Records::new(self)
138    }
139
140    pub fn into_records<B: FromStr + BEDLike>(self) -> IntoRecords<B, R> {
141        IntoRecords::new(self)
142    }
143}
144
145pub enum LineSize {
146    Size(usize),
147    Skip,
148}
149
150fn read_line<R>(reader: &mut R, buf: &mut String) -> io::Result<usize>
151where
152    R: BufRead,
153{
154    const LINE_FEED: char = '\n';
155    const CARRIAGE_RETURN: char = '\r';
156
157    match reader.read_line(buf) {
158        Ok(0) => Ok(0),
159        Ok(n) => {
160            if buf.ends_with(LINE_FEED) {
161                buf.pop();
162
163                if buf.ends_with(CARRIAGE_RETURN) {
164                    buf.pop();
165                }
166            }
167            Ok(n)
168        }
169        Err(e) => Err(e),
170    }
171}
172
173/// A BED writer.
174pub struct Writer<W> {
175    inner: W,
176}
177
178impl<W> Writer<W>
179where
180    W: Write,
181{
182    /// Creates a BED writer.
183    pub fn new(inner: W) -> Self {
184        Self { inner }
185    }
186
187    /// Writes a BED record.
188    pub fn write_record<B>(&mut self, record: &B) -> io::Result<()>
189    where
190        B: std::fmt::Display + BEDLike,
191    {
192        writeln!(&mut self.inner, "{}", record)
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use super::super::BED;
199    use super::*;
200    use crate::bed::*;
201
202    #[test]
203    fn test_read_line() {
204        fn t(buf: &mut String, mut reader: &[u8], expected: &str) {
205            buf.clear();
206            read_line(&mut reader, buf).unwrap();
207            assert_eq!(buf, expected);
208        }
209
210        let mut buf = String::new();
211
212        t(&mut buf, b"noodles\n", "noodles");
213        t(&mut buf, b"noodles\r\n", "noodles");
214        t(&mut buf, b"noodles", "noodles");
215    }
216
217    #[test]
218    fn test_read_record() {
219        let data = b"\
220chr1	200	1000	r1	100	+
221chr2	220	2000	r2	2	-
222chr10	2000	10000	r3	3	+
223" as &[u8];
224        let mut reader = Reader::new(data, None);
225        for b in reader.records() {
226            let x: BED<6> = b.unwrap();
227            println!("{}", x);
228        }
229
230        /*
231        read_record(&mut reader, &mut record)?;
232        assert_eq!(record, Record::new("noodles:1/1", "AGCT", "abcd"));
233
234        read_record(&mut reader, &mut record)?;
235        assert_eq!(record, Record::new("noodles:2/1", "TCGA", "dcba"));
236
237        let n = read_record(&mut reader, &mut record)?;
238        assert_eq!(n, 0);
239        */
240    }
241
242    #[test]
243    fn test_write_record() -> Result<(), Box<dyn std::error::Error>> {
244        let mut writer = Writer::new(Vec::new());
245        let record: BED<3> = "sq0\t8\t13".parse().unwrap();
246        writer.write_record(&record)?;
247        assert_eq!(writer.inner, b"sq0\t8\t13\n");
248        Ok(())
249    }
250
251    #[test]
252    fn test_io_narrowpeak() -> Result<(), Box<dyn std::error::Error>> {
253        let input = "chr1\t9356548\t9356648\t.\t0\t.\t182\t5.0945\t-1\t50";
254        let mut writer = Writer::new(Vec::new());
255        let record: NarrowPeak = input.parse().unwrap();
256        writer.write_record(&record)?;
257        assert_eq!(
258            std::str::from_utf8(writer.inner.as_slice()).unwrap(),
259            input.to_string() + "\n",
260        );
261        Ok(())
262    }
263
264    #[test]
265    fn test_io_bedgraph() -> Result<(), Box<dyn std::error::Error>> {
266        let input = "chr1\t9356548\t9356648\t50.1";
267        let mut writer = Writer::new(Vec::new());
268        let record: BedGraph<f32> = input.parse().unwrap();
269        writer.write_record(&record)?;
270        assert_eq!(
271            std::str::from_utf8(writer.inner.as_slice()).unwrap(),
272            input.to_string() + "\n",
273        );
274        Ok(())
275    }
276}