lightmotif_io/jaspar/
mod.rs1use std::io::BufRead;
19
20use lightmotif::abc::Dna;
21use lightmotif::pwm::CountMatrix;
22
23use crate::error::Error;
24
25mod parse;
26
27#[derive(Debug, Clone)]
34pub struct Record {
35 id: String,
36 description: Option<String>,
37 matrix: CountMatrix<Dna>,
38}
39
40impl Record {
41 pub fn id(&self) -> &str {
43 &self.id
44 }
45
46 pub fn description(&self) -> Option<&str> {
48 self.description.as_deref()
49 }
50
51 pub fn matrix(&self) -> &CountMatrix<Dna> {
53 &self.matrix
54 }
55}
56
57impl AsRef<CountMatrix<Dna>> for Record {
58 fn as_ref(&self) -> &CountMatrix<Dna> {
59 &self.matrix
60 }
61}
62
63impl From<Record> for CountMatrix<Dna> {
64 fn from(value: Record) -> Self {
65 value.matrix
66 }
67}
68
69pub struct Reader<B: BufRead> {
73 buffer: Vec<u8>,
74 bufread: B,
75 start: usize,
76}
77
78impl<B: BufRead> Reader<B> {
79 pub fn new(mut reader: B) -> Self {
81 let mut buffer = Vec::new();
82 let start = reader.read_until(b'>', &mut buffer).unwrap_or(1) - 1;
83
84 Self {
85 bufread: reader,
86 buffer,
87 start,
88 }
89 }
90}
91
92impl<B: BufRead> Iterator for Reader<B> {
93 type Item = Result<Record, Error>;
94 fn next(&mut self) -> Option<Self::Item> {
95 match self.bufread.read_until(b'>', &mut self.buffer) {
96 Ok(n) => {
97 let bytes = if n == 0 {
98 &self.buffer[self.start..]
99 } else {
100 &self.buffer[self.start..=self.start + n]
101 };
102 let text = match std::str::from_utf8(bytes) {
103 Ok(text) => text,
104 Err(_) => {
105 return Some(Err(Error::from(std::io::Error::new(
106 std::io::ErrorKind::InvalidData,
107 "decoding error",
108 ))));
109 }
110 };
111 if n == 0 && text.trim().is_empty() {
112 return None;
113 }
114 let (rest, record) = match self::parse::record(text) {
115 Err(e) => return Some(Err(Error::from(e))),
116 Ok((rest, record)) => (rest, record),
117 };
118 self.start += n + 1 - rest.len();
119 if self.start > self.buffer.capacity() / 2 {
120 let n = self.buffer.len();
121 self.buffer.copy_within(self.start.., 0);
122 self.buffer.truncate(n - self.start);
123 self.start = 0;
124 }
125 Some(Ok(record))
126 }
127 Err(e) => Some(Err(Error::from(e))),
128 }
129 }
130}
131
132pub fn read<B: BufRead>(reader: B) -> self::Reader<B> {
134 self::Reader::new(reader)
135}
136
137#[cfg(test)]
138mod test {
139
140 #[test]
141 fn test_single() {
142 let text = concat!(
143 ">MA1104.2 GATA6\n",
144 "22320 20858 35360 5912 4535 2560 5044 76686 1507 1096 13149 18911 22172\n",
145 "16229 14161 13347 11831 62936 1439 1393 815 852 75930 3228 19054 17969\n",
146 "13432 11894 10394 7066 6459 580 615 819 456 712 1810 18153 11605\n",
147 "27463 32531 20343 54635 5514 74865 72392 1124 76629 1706 61257 23326 27698\n",
148 );
149 let mut reader = super::Reader::new(std::io::Cursor::new(text));
150 let record = reader.next().unwrap().unwrap();
151 assert_eq!(&record.id, "MA1104.2");
152 assert!(reader.next().is_none());
153 }
154}