lightmotif_io/transfac/
reader.rs

1use std::io::BufRead;
2use std::marker::PhantomData;
3
4use lightmotif::Alphabet;
5
6use super::Record;
7use crate::error::Error;
8
9/// A reader for TRANSFAC-formatted files.
10#[derive(Debug)]
11pub struct Reader<B: BufRead, A: Alphabet> {
12    buffer: String,
13    bufread: B,
14    last: usize,
15    version: Option<String>,
16    error: Option<Error>,
17    _alphabet: PhantomData<A>,
18}
19
20impl<B: BufRead, A: Alphabet> Reader<B, A> {
21    pub fn new(reader: B) -> Self {
22        let mut reader = Self {
23            bufread: reader,
24            buffer: String::new(),
25            last: 0,
26            error: None,
27            version: None,
28            _alphabet: PhantomData,
29        };
30
31        let mut end = false;
32        while !end {
33            match reader.bufread.read_line(&mut reader.buffer) {
34                Err(e) => {
35                    reader.error = Some(Error::from(e));
36                    break;
37                }
38                Ok(0) => {
39                    break;
40                }
41                Ok(n) => {
42                    end = reader.buffer[reader.last..].starts_with("//");
43                    if !end {
44                        reader.last += n;
45                    }
46                }
47            }
48        }
49
50        if reader.buffer.starts_with("VV") {
51            match super::parse::parse_version(&reader.buffer) {
52                Err(e) => {
53                    reader.error = Some(Error::from(e));
54                }
55                Ok((rest, version)) => {
56                    reader.version = Some(version.trim().to_string());
57                    reader.last = 0;
58                    reader.buffer.clear();
59                }
60            }
61        }
62
63        reader
64    }
65}
66
67impl<B: BufRead, A: Alphabet> Iterator for Reader<B, A> {
68    type Item = Result<Record<A>, Error>;
69    fn next(&mut self) -> Option<Self::Item> {
70        if let Some(err) = self.error.take() {
71            return Some(Err(err));
72        }
73
74        let mut end = self.buffer[self.last..].starts_with("//");
75        while !end {
76            match self.bufread.read_line(&mut self.buffer) {
77                Err(e) => return Some(Err(Error::from(e))),
78                Ok(0) => break,
79                Ok(n) => {
80                    end = self.buffer[self.last..].starts_with("//");
81                    self.last += n;
82                }
83            }
84        }
85
86        if !self.buffer.is_empty() {
87            let record = match super::parse::parse_record::<A>(&self.buffer) {
88                Err(e) => return Some(Err(Error::from(e))),
89                Ok(x) => x.1,
90            };
91            self.buffer.clear();
92            self.last = 0;
93            Some(Ok(record))
94        } else {
95            None
96        }
97    }
98}
99
100#[cfg(test)]
101mod test {
102
103    use lightmotif::Dna;
104
105    #[test]
106    fn test_single_version() {
107        let text = concat!(
108            "VV  TRANSFAC MATRIX TABLE, Release 9.2 - licensed - 2005-06-30, (C) Biobase GmbH\n",
109            "XX\n",
110            "//\n",
111            "AC  M00001\n",
112            "XX\n",
113            "P0      A      C      G      T\n",
114            "01      1      2      2      0      S\n",
115            "02      2      1      2      0      R\n",
116            "03      3      0      1      1      A\n",
117            "04      0      5      0      0      C\n",
118            "05      5      0      0      0      A\n",
119            "06      0      0      4      1      G\n",
120            "07      0      1      4      0      G\n",
121            "08      0      0      0      5      T\n",
122            "09      0      0      5      0      G\n",
123            "10      0      1      2      2      K\n",
124            "11      0      2      0      3      Y\n",
125            "12      1      0      3      1      G\n",
126            "//\n",
127        );
128        let mut reader = super::Reader::<_, Dna>::new(std::io::Cursor::new(text));
129        assert_eq!(
130            reader.version,
131            Some(String::from(
132                "TRANSFAC MATRIX TABLE, Release 9.2 - licensed - 2005-06-30, (C) Biobase GmbH"
133            ))
134        );
135
136        let matrix = reader.next().unwrap().unwrap();
137        assert_eq!(matrix.accession, Some(String::from("M00001")));
138    }
139
140    #[test]
141    fn test_single_noversion() {
142        let text = concat!(
143            "AC  M00030\n",
144            "XX\n",
145            "P0      A      C      G      T\n",
146            "01      0      1      1     12      T\n",
147            "02      0      0     14      0      G\n",
148            "03     14      0      0      0      A\n",
149            "04      0      0      0     14      T\n",
150            "05      0      0     14      0      G\n",
151            "06      1      2      0     11      T\n",
152            "07     10      0      3      1      A\n",
153            "08      6      2      4      2      N\n",
154            "09      5      4      1      4      N\n",
155            "10      2      1      1     10      T\n",
156            "//\n",
157        );
158
159        let mut reader = super::Reader::<_, Dna>::new(std::io::Cursor::new(text));
160        let matrix = reader.next().unwrap().unwrap();
161        assert_eq!(reader.version, None);
162        assert_eq!(matrix.accession, Some(String::from("M00030")));
163    }
164
165    #[test]
166    fn test_multi_version() {
167        let text = concat!(
168            "VV  TRANSFAC MATRIX TABLE, Release 2.2\n",
169            "XX\n",
170            "//\n",
171            "ID  prodoric_MX000001\n",
172            "BF  Pseudomonas aeruginosa\n",
173            "P0      A      T      G      C\n",
174            "00      0      0      2      0      G\n",
175            "01      0      2      0      0      T\n",
176            "02      0      2      0      0      T\n",
177            "03      0      0      2      0      G\n",
178            "04      2      0      0      0      A\n",
179            "05      0      1      0      1      y\n",
180            "06      0      0      0      2      C\n",
181            "07      0      1      0      1      y\n",
182            "08      1      1      0      0      w\n",
183            "09      1      0      1      0      r\n",
184            "10      0      2      0      0      T\n",
185            "11      0      0      0      2      C\n",
186            "12      2      0      0      0      A\n",
187            "13      2      0      0      0      A\n",
188            "14      0      0      0      2      C\n",
189            "XX\n",
190            "//\n",
191            "ID prodoric_MX000003\n",
192            "BF Escherichia coli\n",
193            "P0      A      T      G      C\n",
194            "00      2     65      0      2      t\n",
195            "01     64      0      3      2      a\n",
196            "02     12     11      1     45      c\n",
197            "03      5     29      5     30      n\n",
198            "04      8     18     11     32      n\n",
199            "05     34     12      0     23      h\n",
200            "06     12     43      4     10      t\n",
201            "XX\n",
202            "//\n",
203        );
204
205        let mut reader = super::Reader::<_, Dna>::new(std::io::Cursor::new(text));
206        let m1 = reader.next().unwrap().unwrap();
207        assert_eq!(m1.id, Some(String::from("prodoric_MX000001")));
208        let m2 = reader.next().unwrap().unwrap();
209        assert_eq!(m2.id, Some(String::from("prodoric_MX000003")));
210        assert!(reader.next().is_none());
211    }
212
213    #[test]
214    fn test_multi_noversion() {
215        let text = concat!(
216            "ID  prodoric_MX000001\n",
217            "BF  Pseudomonas aeruginosa\n",
218            "P0      A      T      G      C\n",
219            "00      0      0      2      0      G\n",
220            "01      0      2      0      0      T\n",
221            "02      0      2      0      0      T\n",
222            "03      0      0      2      0      G\n",
223            "04      2      0      0      0      A\n",
224            "05      0      1      0      1      y\n",
225            "06      0      0      0      2      C\n",
226            "07      0      1      0      1      y\n",
227            "08      1      1      0      0      w\n",
228            "09      1      0      1      0      r\n",
229            "10      0      2      0      0      T\n",
230            "11      0      0      0      2      C\n",
231            "12      2      0      0      0      A\n",
232            "13      2      0      0      0      A\n",
233            "14      0      0      0      2      C\n",
234            "XX\n",
235            "//\n",
236            "ID prodoric_MX000003\n",
237            "BF Escherichia coli\n",
238            "P0      A      T      G      C\n",
239            "00      2     65      0      2      t\n",
240            "01     64      0      3      2      a\n",
241            "02     12     11      1     45      c\n",
242            "03      5     29      5     30      n\n",
243            "04      8     18     11     32      n\n",
244            "05     34     12      0     23      h\n",
245            "06     12     43      4     10      t\n",
246            "XX\n",
247            "//\n",
248        );
249
250        let mut reader = super::Reader::<_, Dna>::new(std::io::Cursor::new(text));
251        let m1 = reader.next().unwrap().unwrap();
252        assert_eq!(m1.id, Some(String::from("prodoric_MX000001")));
253        let m2 = reader.next().unwrap().unwrap();
254        assert_eq!(m2.id, Some(String::from("prodoric_MX000003")));
255        assert!(reader.next().is_none());
256    }
257}