bed_utils/bed/
io.rs

1use crate::bed::BEDLike;
2
3use std::io::{self, Error, ErrorKind, Read, Write, BufRead, BufReader};
4use std::str::FromStr;
5use std::marker::PhantomData;
6
7use super::ParseError;
8
9/// An iterator over records of a FASTQ reader.
10///
11/// This is created by calling [`Reader::records`].
12pub struct Records<'a, B, R> {
13    inner: &'a mut Reader<R>,
14    buf: String,
15    phantom: PhantomData<B>,
16}
17
18impl<'a, B, R> Records<'a, B, R>
19where
20    R: Read,
21    B: FromStr,
22{
23    pub fn new(inner: &'a mut Reader<R>) -> Self {
24        Self {
25            inner,
26            buf: String::new(),
27            phantom: PhantomData,
28        }
29    }
30}
31
32impl<'a, B, R> Iterator for Records<'a, B, R>
33where
34    R: Read,
35    B: FromStr<Err = ParseError>,
36{
37    type Item = io::Result<B>;
38
39    fn next(&mut self) -> Option<Self::Item> {
40        self.buf.clear();
41        match self.inner.read_record(&mut self.buf) {
42            Ok(LineSize::Size(0)) => None,
43            Ok(LineSize::Skip) => self.next(),
44            Ok(_) => Some(self.buf.parse().map_err(
45                |e| Error::new(ErrorKind::Other, format!("{:?}: {}", e, &self.buf))
46            )),
47            Err(e) => Some(Err(e)),
48        }
49    }
50}
51
52/// An iterator over records of a FASTQ reader.
53///
54/// This is created by calling [`Reader::records`].
55pub struct IntoRecords<B, R> {
56    inner: Reader<R>,
57    buf: String,
58    phantom: PhantomData<B>,
59}
60
61impl<B, R> IntoRecords<B, R>
62where
63    R: Read,
64    B: FromStr,
65{
66    pub fn new(inner: Reader<R>) -> Self {
67        Self { inner, buf: String::new(), phantom: PhantomData }
68    }
69}
70
71impl<B, R> Iterator for IntoRecords<B, R>
72where
73    R: Read,
74    B: FromStr<Err = ParseError>,
75{
76    type Item = io::Result<B>;
77
78    fn next(&mut self) -> Option<Self::Item> {
79        self.buf.clear();
80        match self.inner.read_record(&mut self.buf) {
81            Ok(LineSize::Size(0)) => None,
82            Ok(LineSize::Skip) => self.next(),
83            Ok(_) => Some(self.buf.parse().map_err(
84                |e| Error::new(ErrorKind::Other, format!("{:?}: {}", e, &self.buf))
85            )),
86            Err(e) => Some(Err(e)),
87        }
88    }
89}
90
91/// A BED reader.
92pub struct Reader<R> {
93    inner: BufReader<R>,
94    skip_start_with: Option<String>,
95}
96
97impl<R> Reader<R>
98where
99    R: Read,
100{
101    /// Creates a BED reader.
102    pub fn new(inner: R, skip_start_with: Option<String>) -> Self {
103        Self { inner: BufReader::new(inner), skip_start_with }
104    }
105
106    /// Reads a single raw BED record.
107    pub fn read_record(&mut self, buf: &mut String) -> io::Result<LineSize> {
108        let size = read_line(&mut self.inner, buf)?;
109        if size > 0 && self.skip_start_with.as_ref().map_or(false, |x| buf.starts_with(x)) {
110            Ok(LineSize::Skip)
111        } else {
112            Ok(LineSize::Size(size))
113        }
114    }
115
116    /// Returns an iterator over records starting from the current stream position.
117    ///
118    /// The stream is expected to be at the start of a record.
119    ///
120    pub fn records<B: FromStr + BEDLike>(&mut self) -> Records<'_, B, R> {
121        Records::new(self)
122    }
123
124    pub fn into_records<B: FromStr + BEDLike>(self) -> IntoRecords<B, R> {
125        IntoRecords::new(self)
126    }
127}
128
129pub enum LineSize {
130    Size(usize),
131    Skip,
132}
133
134fn read_line<R>(reader: &mut R, buf: &mut String) -> io::Result<usize>
135where
136    R: BufRead,
137{
138    const LINE_FEED: char = '\n';
139    const CARRIAGE_RETURN: char = '\r';
140
141    match reader.read_line(buf) {
142        Ok(0) => Ok(0),
143        Ok(n) => {
144            if buf.ends_with(LINE_FEED) {
145                buf.pop();
146
147                if buf.ends_with(CARRIAGE_RETURN) {
148                    buf.pop();
149                }
150            }
151            Ok(n)
152        }
153        Err(e) => Err(e),
154    }
155}
156
157/// A BED writer.
158pub struct Writer<W> { inner: W }
159
160impl<W> Writer<W> where W: Write {
161    /// Creates a BED writer.
162    pub fn new(inner: W) -> Self { Self { inner } }
163
164    /// Writes a BED record.
165    pub fn write_record<B>(&mut self, record: &B) -> io::Result<()>
166    where
167        B: std::fmt::Display + BEDLike,
168    {
169        writeln!(&mut self.inner, "{}", record)
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176    use super::super::BED;
177    use crate::bed::*;
178
179    #[test]
180    fn test_read_line() {
181        fn t(buf: &mut String, mut reader: &[u8], expected: &str) {
182            buf.clear();
183            read_line(&mut reader, buf).unwrap();
184            assert_eq!(buf, expected);
185        }
186
187        let mut buf = String::new();
188
189        t(&mut buf, b"noodles\n", "noodles");
190        t(&mut buf, b"noodles\r\n", "noodles");
191        t(&mut buf, b"noodles", "noodles");
192    }
193
194    #[test]
195    fn test_read_record() {
196        let data = b"\
197chr1	200	1000	r1	100	+
198chr2	220	2000	r2	2	-
199chr10	2000	10000	r3	3	+
200" as &[u8];
201        let mut reader = Reader::new(data, None);
202        for b in reader.records() {
203            let x: BED<6> = b.unwrap();
204            println!("{}", x);
205        }
206
207        /*
208        read_record(&mut reader, &mut record)?;
209        assert_eq!(record, Record::new("noodles:1/1", "AGCT", "abcd"));
210
211        read_record(&mut reader, &mut record)?;
212        assert_eq!(record, Record::new("noodles:2/1", "TCGA", "dcba"));
213
214        let n = read_record(&mut reader, &mut record)?;
215        assert_eq!(n, 0);
216        */
217
218    }
219
220    #[test]
221    fn test_write_record() -> Result<(), Box<dyn std::error::Error>> {
222        let mut writer = Writer::new(Vec::new());
223        let record: BED<3> = "sq0\t8\t13".parse().unwrap();
224        writer.write_record(&record)?;
225        assert_eq!(writer.inner, b"sq0\t8\t13\n");
226        Ok(())
227    }
228
229    #[test]
230    fn test_io_narrowpeak() -> Result<(), Box<dyn std::error::Error>> {
231        let input = "chr1\t9356548\t9356648\t.\t0\t.\t182\t5.0945\t-1\t50";
232        let mut writer = Writer::new(Vec::new());
233        let record: NarrowPeak = input.parse().unwrap();
234        writer.write_record(&record)?;
235        assert_eq!(
236            std::str::from_utf8(writer.inner.as_slice()).unwrap(),
237            input.to_string() + "\n",
238        );
239        Ok(())
240    }
241
242    #[test]
243    fn test_io_bedgraph() -> Result<(), Box<dyn std::error::Error>> {
244        let input = "chr1\t9356548\t9356648\t50.1";
245        let mut writer = Writer::new(Vec::new());
246        let record: BedGraph<f32> = input.parse().unwrap();
247        writer.write_record(&record)?;
248        assert_eq!(
249            std::str::from_utf8(writer.inner.as_slice()).unwrap(),
250            input.to_string() + "\n",
251        );
252        Ok(())
253    }
254}
255
256