1use std::io::BufRead;
4use std::sync::Arc;
5
6use lightmotif::abc::Alphabet;
7use lightmotif::abc::Background;
8use lightmotif::abc::Symbol;
9use lightmotif::dense::DenseMatrix;
10use lightmotif::num::Unsigned;
11use lightmotif::pwm::FrequencyMatrix;
12
13use crate::error::Error;
14
15mod parse;
16
17pub struct Record<A: Alphabet> {
21 alength: Option<usize>,
22 w: Option<usize>,
23 nsites: Option<usize>,
24 evalue: Option<f32>,
25 id: String,
26 name: Option<String>,
27 matrix: FrequencyMatrix<A>,
28 url: Option<String>,
29 background: Option<Background<A>>,
30}
31
32impl<A: Alphabet> Record<A> {
33 pub fn background(&self) -> Option<&Background<A>> {
35 self.background.as_ref()
36 }
37
38 pub fn id(&self) -> &str {
40 &self.id
41 }
42
43 pub fn name(&self) -> Option<&str> {
45 self.name.as_ref().map(String::as_ref)
46 }
47
48 pub fn url(&self) -> Option<&str> {
50 self.url.as_ref().map(String::as_ref)
51 }
52
53 pub fn matrix(&self) -> &FrequencyMatrix<A> {
55 &self.matrix
56 }
57
58 pub fn into_matrix(self) -> FrequencyMatrix<A> {
60 self.matrix
61 }
62}
63
64impl<A: Alphabet> AsRef<FrequencyMatrix<A>> for Record<A> {
65 fn as_ref(&self) -> &FrequencyMatrix<A> {
66 &self.matrix
67 }
68}
69
70pub struct Reader<B: BufRead, A: Alphabet> {
74 buffer: String,
75 bufread: B,
76 meme_version: String,
77 symbols: Vec<A::Symbol>,
78 background: Option<Background<A>>,
79 error: Option<crate::error::Error>,
80 _alphabet: std::marker::PhantomData<A>,
81}
82
83impl<B: BufRead, A: Alphabet> Reader<B, A> {
84 pub fn new(mut reader: B) -> Self {
85 let mut buffer = String::new();
86 let mut meme_version = None;
87 let mut error = None;
88 let mut background = None;
89 let mut symbols = None;
90
91 macro_rules! read_line {
92 () => {
93 match reader.read_line(&mut buffer) {
94 Err(e) => {
95 error = Some(crate::error::Error::Io(Arc::new(e)));
96 break;
97 }
98 Ok(0) => {
99 error = Some(crate::error::Error::InvalidData(None)); break;
101 }
102 _ => (),
103 }
104 };
105 }
106
107 while !buffer.starts_with("MOTIF") {
109 match self::parse::meme_version(&buffer) {
111 Err(e) => buffer.clear(),
112 Ok((_, v)) if meme_version.is_none() => {
113 meme_version = Some(v.to_string());
114 buffer.clear();
115 break;
116 }
117 Ok(_) => {
118 error = Some(crate::error::Error::InvalidData(Some(String::from(
119 "multiple MEME versions found",
120 ))));
121 break;
122 }
123 }
124 read_line!();
126 }
127
128 if meme_version.is_none() {
130 error = Some(crate::error::Error::InvalidData(Some(String::from(
131 "no MEME version found",
132 ))));
133 meme_version = Some(String::new());
134 }
135
136 if error.is_none() {
138 while !buffer.starts_with("MOTIF") && error.is_none() {
139 if buffer.starts_with("Background letter frequencies") {
141 loop {
142 read_line!();
143 match self::parse::background::<A>(&buffer) {
144 Err(nom::Err::Incomplete(_)) => {
145 read_line!();
146 }
147 Err(e) => {
148 error = Some(e.into());
149 break;
150 }
151 Ok((_, bg)) => {
152 background = Some(bg);
153 buffer.clear();
154 break;
155 }
156 }
157 }
158 }
159 if buffer.starts_with("ALPHABET") {
161 match self::parse::alphabet::<A>(&buffer) {
162 Err(e) => {
163 error = Some(e.into());
164 break;
165 }
166 Ok((_, s)) => {
167 symbols = Some(s);
168 buffer.clear();
169 }
170 }
171 }
172 buffer.clear();
174 read_line!();
175 }
176 }
177
178 if symbols.is_none() {
180 let mut s = A::symbols().to_vec();
181 s.as_mut_slice()[..A::K::USIZE - 1].sort_by(|x, y| x.as_char().cmp(&y.as_char()));
182 symbols = Some(s);
183 }
184
185 Self {
186 bufread: reader,
187 buffer,
188 meme_version: meme_version.unwrap(),
189 error,
190 background,
191 symbols: symbols.unwrap(),
192 _alphabet: std::marker::PhantomData,
193 }
194 }
195
196 pub fn meme_version(&self) -> Result<&str, Error> {
198 if let Some(e) = self.error.as_ref() {
199 Err(e.clone())
200 } else {
201 Ok(&self.meme_version)
202 }
203 }
204
205 pub fn background(&self) -> Result<Option<&Background<A>>, Error> {
207 if let Some(e) = self.error.as_ref() {
208 Err(e.clone())
209 } else {
210 Ok(self.background.as_ref())
211 }
212 }
213}
214
215impl<B: BufRead, A: Alphabet> Iterator for Reader<B, A> {
216 type Item = Result<Record<A>, Error>;
217 fn next(&mut self) -> Option<Self::Item> {
218 let mut motif: Option<Record<A>> = None;
219
220 if let Some(err) = self.error.as_ref() {
221 return Some(Err(err.clone()));
222 }
223
224 loop {
225 if self.buffer.starts_with("MOTIF") {
226 if let Some(m) = motif {
227 return Some(Ok(m));
228 }
229 match self::parse::motif(&self.buffer) {
230 Err(e) => return Some(Err(e.into())),
231 Ok((_, (id, name))) => {
232 motif = Some(Record {
233 alength: None,
234 w: None,
235 nsites: None,
236 evalue: None,
237 id: id.to_string(),
238 name: name.map(String::from),
239 url: None,
240 matrix: FrequencyMatrix::new(DenseMatrix::new(0)).unwrap(),
241 background: self.background.clone(),
242 });
243 }
244 }
245 self.buffer.clear();
246 } else if self.buffer.starts_with("letter-probability matrix") {
247 self.buffer.clear(); let mut rows = Vec::new();
249 loop {
250 match self.bufread.read_line(&mut self.buffer) {
251 Err(e) => return Some(Err(e.into())),
252 Ok(0) => break,
253 Ok(n) => (),
254 };
255 match self::parse::motif_row::<A>(&self.buffer, &self.symbols) {
256 Ok((_, row)) => rows.push(row),
257 Err(e) => break, }
259 self.buffer.clear();
260 }
261 if let Some(m) = motif.as_mut() {
262 m.matrix = FrequencyMatrix::new(DenseMatrix::from_rows(rows)).unwrap();
263 } else {
264 return Some(Err(crate::error::Error::InvalidData(Some(String::from(
265 "motif data before declared motif",
266 )))));
267 }
268 } else if self.buffer.starts_with("URL") {
269 let url = match self::parse::url(&self.buffer) {
270 Ok((_, url)) => url,
271 Err(e) => return Some(Err(e.into())),
272 };
273 if let Some(m) = motif.as_mut() {
274 m.url = Some(url.into());
275 } else {
276 return Some(Err(crate::error::Error::InvalidData(Some(String::from(
277 "motif data before declared motif",
278 )))));
279 }
280 self.buffer.clear();
281 } else {
282 self.buffer.clear();
283 }
284
285 if self.buffer.is_empty() {
286 match self.bufread.read_line(&mut self.buffer) {
287 Err(e) => return Some(Err(e.into())),
288 Ok(0) => return motif.map(Ok),
289 Ok(_) => (),
290 }
291 }
292 }
293 }
294}
295
296pub fn read<B: BufRead, A: Alphabet>(reader: B) -> self::Reader<B, A> {
298 self::Reader::new(reader)
299}
300
301#[cfg(test)]
302mod tests {
303 use lightmotif::abc::{Dna, Nucleotide, Symbol};
304
305 #[test]
306 fn record() {
307 const TEXT: &'static str = concat!(
308 "MEME version 4\n",
309 "MOTIF MA0004.1 Arnt\n",
310 "letter-probability matrix: alength= 4 w= 6 nsites= 20 E= 0\n",
311 " 0.200000 0.800000 0.000000 0.000000\n",
312 " 0.950000 0.000000 0.050000 0.000000\n",
313 " 0.000000 1.000000 0.000000 0.000000\n",
314 " 0.000000 0.000000 1.000000 0.000000\n",
315 " 0.000000 0.000000 0.000000 1.000000\n",
316 " 0.000000 0.000000 1.000000 0.000000\n",
317 "URL http://jaspar.genereg.net/matrix/MA0004.1\n",
318 );
319 let mut reader = super::Reader::<_, Dna>::new(std::io::Cursor::new(TEXT));
320 let record = reader.next().unwrap().unwrap();
321 assert_eq!(&record.id, "MA0004.1");
322 assert_eq!(
323 record.url().unwrap(),
324 "http://jaspar.genereg.net/matrix/MA0004.1"
325 );
326 assert!(reader.next().is_none());
327
328 assert_eq!(
329 record.matrix().matrix()[0][Nucleotide::A.as_index()],
330 0.200000
331 );
332 assert_eq!(
333 record.matrix().matrix()[0][Nucleotide::C.as_index()],
334 0.800000
335 );
336 assert_eq!(
337 record.matrix().matrix()[0][Nucleotide::G.as_index()],
338 0.000000
339 );
340 assert_eq!(
341 record.matrix().matrix()[0][Nucleotide::T.as_index()],
342 0.000000
343 );
344
345 assert_eq!(
346 record.matrix().matrix()[1][Nucleotide::A.as_index()],
347 0.950000
348 );
349 assert_eq!(
350 record.matrix().matrix()[1][Nucleotide::C.as_index()],
351 0.000000
352 );
353 assert_eq!(
354 record.matrix().matrix()[1][Nucleotide::G.as_index()],
355 0.050000
356 );
357 assert_eq!(
358 record.matrix().matrix()[1][Nucleotide::T.as_index()],
359 0.000000
360 );
361
362 assert_eq!(
363 record.matrix().matrix()[3][Nucleotide::A.as_index()],
364 0.000000
365 );
366 assert_eq!(
367 record.matrix().matrix()[3][Nucleotide::C.as_index()],
368 0.000000
369 );
370 assert_eq!(
371 record.matrix().matrix()[3][Nucleotide::G.as_index()],
372 1.000000
373 );
374 assert_eq!(
375 record.matrix().matrix()[3][Nucleotide::T.as_index()],
376 0.000000
377 );
378 }
379}