lightmotif_io/jaspar16/
mod.rs1use std::io::BufRead;
19
20use lightmotif::abc::Alphabet;
21use lightmotif::pwm::CountMatrix;
22
23use crate::error::Error;
24
25mod parse;
26
27#[derive(Debug, Clone)]
31pub struct Record<A: Alphabet> {
32 id: String,
33 description: Option<String>,
34 matrix: CountMatrix<A>,
35}
36
37impl<A: Alphabet> Record<A> {
38 pub fn id(&self) -> &str {
40 &self.id
41 }
42
43 pub fn description(&self) -> Option<&str> {
45 self.description.as_deref()
46 }
47
48 pub fn matrix(&self) -> &CountMatrix<A> {
50 &self.matrix
51 }
52
53 pub fn into_matrix(self) -> CountMatrix<A> {
55 self.matrix
56 }
57}
58
59impl<A: Alphabet> AsRef<CountMatrix<A>> for Record<A> {
60 fn as_ref(&self) -> &CountMatrix<A> {
61 &self.matrix
62 }
63}
64
65pub struct Reader<B: BufRead, A: Alphabet> {
69 buffer: Vec<u8>,
70 bufread: B,
71 start: usize,
72 _alphabet: std::marker::PhantomData<A>,
73}
74
75impl<B: BufRead, A: Alphabet> Reader<B, A> {
76 pub fn new(mut reader: B) -> Self {
77 let mut buffer = Vec::new();
78 let start = reader.read_until(b'>', &mut buffer).unwrap_or(1) - 1;
79
80 Self {
81 bufread: reader,
82 buffer,
83 start,
84 _alphabet: std::marker::PhantomData,
85 }
86 }
87}
88
89impl<B: BufRead, A: Alphabet> Iterator for Reader<B, A> {
90 type Item = Result<Record<A>, Error>;
91 fn next(&mut self) -> Option<Self::Item> {
92 match self.bufread.read_until(b'>', &mut self.buffer) {
93 Ok(n) => {
94 let bytes = if n == 0 {
95 &self.buffer[self.start..]
96 } else {
97 &self.buffer[self.start..=self.start + n]
98 };
99 let text = match std::str::from_utf8(bytes) {
100 Ok(text) => text,
101 Err(_) => {
102 return Some(Err(Error::from(std::io::Error::new(
103 std::io::ErrorKind::InvalidData,
104 "decoding error",
105 ))));
106 }
107 };
108 if n == 0 && text.trim().is_empty() {
109 return None;
110 }
111 let (rest, record) = match self::parse::record::<A>(text) {
112 Err(e) => return Some(Err(Error::from(e))),
113 Ok((rest, record)) => (rest, record),
114 };
115 self.start += n + 1 - rest.len();
116 if self.start > self.buffer.capacity() / 2 {
117 let n = self.buffer.len();
118 self.buffer.copy_within(self.start.., 0);
119 self.buffer.truncate(n - self.start);
120 self.start = 0;
121 }
122 Some(Ok(record))
123 }
124 Err(e) => Some(Err(Error::from(e))),
125 }
126 }
127}
128
129pub fn read<B: BufRead, A: Alphabet>(reader: B) -> self::Reader<B, A> {
131 self::Reader::new(reader)
132}
133
134#[cfg(test)]
135mod test {
136
137 use lightmotif::Dna;
138
139 #[test]
140 fn test_single() {
141 let text = concat!(
142 ">MA0001.1 RUNX1\n",
143 "A [10 12 4 1 2 2 0 0 0 8 13 ]\n",
144 "C [ 2 2 7 1 0 8 0 0 1 2 2 ]\n",
145 "G [ 3 1 1 0 23 0 26 26 0 0 4 ]\n",
146 "T [11 11 14 24 1 16 0 0 25 16 7 ]\n",
147 );
148 let mut reader = super::Reader::<_, Dna>::new(std::io::Cursor::new(text));
149 let record = reader.next().unwrap().unwrap();
150 assert_eq!(&record.id, "MA0001.1");
151 assert!(reader.next().is_none());
152 }
153
154 #[test]
155 fn test_multi() {
156 let text = concat!(
157 ">MA0001.1 RUNX1\n",
158 "A [10 12 4 1 2 2 0 0 0 8 13 ]\n",
159 "C [ 2 2 7 1 0 8 0 0 1 2 2 ]\n",
160 "G [ 3 1 1 0 23 0 26 26 0 0 4 ]\n",
161 "T [11 11 14 24 1 16 0 0 25 16 7 ]\n",
162 ">MA0002.1 RUNX1\n",
163 "A [10 12 4 1 2 2 0 0 0 8 13 ]\n",
164 "C [ 2 2 7 1 0 8 0 0 1 2 2 ]\n",
165 "G [ 3 1 1 0 23 0 26 26 0 0 4 ]\n",
166 "T [11 11 14 24 1 16 0 0 25 16 7 ]\n",
167 );
168 let mut reader = super::Reader::<_, Dna>::new(std::io::Cursor::new(text));
169 let record = reader.next().unwrap().unwrap();
170 assert_eq!(&record.id, "MA0001.1");
171 let record = reader.next().unwrap().unwrap();
172 assert_eq!(&record.id, "MA0002.1");
173 assert!(reader.next().is_none());
174 }
175}