chainfile/alignment/section/
header.rs1pub mod sequence;
4
5use std::num::ParseIntError;
6use std::str::FromStr;
7
8use omics::coordinate::position::Number;
9pub use sequence::Sequence;
10
11pub const HEADER_PREFIX: &str = "chain";
13
14pub const DELIMITER: char = ' ';
16
17pub const NUM_HEADER_FIELDS: usize = 13;
19
20#[derive(Debug)]
26pub enum ParseError {
27 IncorrectNumberOfFields(usize),
29
30 InvalidPrefix(String),
32
33 InvalidScore(ParseIntError),
35
36 InvalidReferenceSequence(sequence::Error),
38
39 InvalidQuerySequence(sequence::Error),
41
42 InvalidId(ParseIntError),
44
45 EndPositionExceedsSize(String, Number, Number),
47}
48
49impl std::fmt::Display for ParseError {
50 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51 match self {
52 ParseError::IncorrectNumberOfFields(fields) => write!(
53 f,
54 "invalid number of fields in header: expected {} fields, found {} fields",
55 NUM_HEADER_FIELDS, fields
56 ),
57 ParseError::InvalidPrefix(prefix) => {
58 write!(
59 f,
60 "invalid prefix: expected \"{}\", found \"{}\"",
61 HEADER_PREFIX, prefix
62 )
63 }
64 ParseError::InvalidScore(err) => write!(f, "invalid score: {}", err),
65 ParseError::InvalidReferenceSequence(err) => {
66 write!(f, "invalid reference sequence: {}", err)
67 }
68 ParseError::InvalidQuerySequence(err) => write!(f, "invalid query sequence: {}", err),
69 ParseError::InvalidId(err) => write!(f, "invalid id: {}", err),
70 ParseError::EndPositionExceedsSize(chrom, pos, size) => write!(
71 f,
72 "the end position ({}) exceeds the size of the chromosome `{}` ({})",
73 pos, chrom, size
74 ),
75 }
76 }
77}
78
79impl std::error::Error for ParseError {}
80
81#[derive(Debug)]
83pub enum Error {
84 Parse(ParseError),
86}
87
88impl std::fmt::Display for Error {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 match self {
91 Error::Parse(err) => write!(f, "parse error: {err}"),
92 }
93 }
94}
95
96impl std::error::Error for Error {}
97
98type Result<T> = std::result::Result<T, Error>;
100
101#[derive(Clone, Debug, Eq, PartialEq)]
107pub struct Record {
108 score: usize,
110
111 reference_sequence: Sequence,
113
114 query_sequence: Sequence,
116
117 id: usize,
119}
120
121impl Record {
122 pub fn score(&self) -> usize {
135 self.score
136 }
137
138 pub fn reference_sequence(&self) -> &Sequence {
157 &self.reference_sequence
158 }
159
160 pub fn query_sequence(&self) -> &Sequence {
179 &self.query_sequence
180 }
181
182 pub fn id(&self) -> usize {
196 self.id
197 }
198}
199
200impl FromStr for Record {
201 type Err = Error;
202
203 fn from_str(s: &str) -> Result<Self> {
204 let parts = s.split(DELIMITER).collect::<Vec<_>>();
205 if parts.len() != NUM_HEADER_FIELDS {
206 return Err(Error::Parse(ParseError::IncorrectNumberOfFields(
207 parts.len(),
208 )));
209 }
210
211 let chain = parts[0];
212 if chain != HEADER_PREFIX {
213 return Err(Error::Parse(ParseError::InvalidPrefix(chain.into())));
214 }
215
216 let score = parts[1]
217 .parse()
218 .map_err(|err| Error::Parse(ParseError::InvalidScore(err)))?;
219 let reference_sequence =
220 Sequence::try_from_str_parts(parts[2], parts[3], parts[4], parts[5], parts[6])
221 .map_err(|err| Error::Parse(ParseError::InvalidReferenceSequence(err)))?;
222 let query_sequence =
223 Sequence::try_from_str_parts(parts[7], parts[8], parts[9], parts[10], parts[11])
224 .map_err(|err| Error::Parse(ParseError::InvalidQuerySequence(err)))?;
225 let id = parts[12]
226 .parse()
227 .map_err(|err| Error::Parse(ParseError::InvalidId(err)))?;
228
229 if reference_sequence.chromosome_size() < reference_sequence.alignment_end() {
230 return Err(Error::Parse(ParseError::EndPositionExceedsSize(
231 reference_sequence.chromosome_name().to_string(),
232 reference_sequence.alignment_end(),
233 reference_sequence.chromosome_size(),
234 )));
235 }
236
237 if query_sequence.chromosome_size() < query_sequence.alignment_end() {
238 return Err(Error::Parse(ParseError::EndPositionExceedsSize(
239 query_sequence.chromosome_name().to_string(),
240 query_sequence.alignment_end(),
241 query_sequence.chromosome_size(),
242 )));
243 }
244
245 Ok(Record {
246 score,
247 reference_sequence,
248 query_sequence,
249 id,
250 })
251 }
252}
253
254impl std::fmt::Display for Record {
255 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
256 write!(
257 f,
258 "{}{}{}{}{}{}{}{}{}",
259 HEADER_PREFIX,
260 DELIMITER,
261 self.score,
262 DELIMITER,
263 self.reference_sequence,
264 DELIMITER,
265 self.query_sequence,
266 DELIMITER,
267 self.id
268 )
269 }
270}
271
272#[cfg(test)]
273mod tests {
274 use omics::coordinate::Strand;
275
276 use super::*;
277
278 #[test]
279 pub fn parse() {
280 let header = "chain 0 seq0 2 + 0 2 seq0 2 - 0 2 1"
281 .parse::<Record>()
282 .unwrap();
283
284 assert_eq!(header.score(), 0);
285
286 assert_eq!(header.reference_sequence().chromosome_name(), "seq0");
287 assert_eq!(header.reference_sequence().chromosome_size(), 2);
288 assert_eq!(header.reference_sequence().strand(), Strand::Positive);
289 assert_eq!(header.reference_sequence().alignment_start(), 0);
290 assert_eq!(header.reference_sequence().alignment_end(), 2);
291
292 assert_eq!(header.query_sequence().chromosome_name(), "seq0");
293 assert_eq!(header.query_sequence().chromosome_size(), 2);
294 assert_eq!(header.query_sequence().strand(), Strand::Negative);
295 assert_eq!(header.query_sequence().alignment_start(), 0);
296 assert_eq!(header.query_sequence().alignment_end(), 2);
297
298 assert_eq!(header.id(), 1);
299 }
300
301 #[test]
302 fn incorrect_number_of_fields() {
303 let err = "foo 0 seq0 2 + 0 2 seq0 2 - 0 2"
304 .parse::<Record>()
305 .unwrap_err();
306
307 assert!(matches!(
308 err,
309 Error::Parse(ParseError::IncorrectNumberOfFields(_))
310 ));
311
312 assert_eq!(
313 err.to_string(),
314 "parse error: invalid number of fields in header: expected 13 fields, found 12 fields"
315 );
316 }
317
318 #[test]
319 fn invalid_prefix() {
320 let err = "foo 0 seq0 2 + 0 2 seq0 2 - 0 2 1"
321 .parse::<Record>()
322 .unwrap_err();
323
324 assert!(matches!(err, Error::Parse(ParseError::InvalidPrefix(_))));
325 assert_eq!(
326 err.to_string(),
327 "parse error: invalid prefix: expected \"chain\", found \"foo\""
328 );
329 }
330
331 #[test]
332 fn invalid_score() {
333 let err = "chain ? seq0 2 + 0 2 seq0 2 - 0 2 1"
334 .parse::<Record>()
335 .unwrap_err();
336
337 assert!(matches!(err, Error::Parse(ParseError::InvalidScore(_))));
338 assert_eq!(
339 err.to_string(),
340 "parse error: invalid score: invalid digit found in string"
341 );
342 }
343
344 #[test]
345 fn invalid_reference_sequence() {
346 let err = "chain 0 seq0 ? + 0 2 seq0 2 - 0 2 1"
347 .parse::<Record>()
348 .unwrap_err();
349
350 assert!(matches!(
351 err,
352 Error::Parse(ParseError::InvalidReferenceSequence(_))
353 ));
354
355 assert_eq!(
356 err.to_string(),
357 "parse error: invalid reference sequence: parse error: invalid chromosome size: \
358 invalid digit found in string"
359 );
360 }
361
362 #[test]
363 fn invalid_query_sequence() {
364 let err = "chain 0 seq0 2 + 0 2 seq0 ? - 0 2 1"
365 .parse::<Record>()
366 .unwrap_err();
367
368 assert!(matches!(
369 err,
370 Error::Parse(ParseError::InvalidQuerySequence(_))
371 ));
372
373 assert_eq!(
374 err.to_string(),
375 "parse error: invalid query sequence: parse error: invalid chromosome size: invalid \
376 digit found in string"
377 );
378 }
379
380 #[test]
381 fn invalid_id() {
382 let err = "chain 0 seq0 2 + 0 2 seq0 2 - 0 2 ?"
383 .parse::<Record>()
384 .unwrap_err();
385
386 assert!(matches!(err, Error::Parse(ParseError::InvalidId(_))));
387 assert_eq!(
388 err.to_string(),
389 "parse error: invalid id: invalid digit found in string"
390 );
391 }
392
393 #[test]
394 fn end_is_greater_than_size_reference() {
395 let err = "chain 0 seq0 2 + 0 3 seq0 2 - 0 1 1"
396 .parse::<Record>()
397 .unwrap_err();
398
399 assert!(matches!(
400 err,
401 Error::Parse(ParseError::EndPositionExceedsSize(_, _, _))
402 ));
403
404 assert_eq!(
405 err.to_string(),
406 "parse error: the end position (3) exceeds the size of the chromosome `seq0` (2)"
407 );
408 }
409
410 #[test]
411 fn end_is_greater_than_size_query() {
412 let err = "chain 0 seq0 2 + 0 1 seq0 2 - 0 3 1"
413 .parse::<Record>()
414 .unwrap_err();
415
416 assert!(matches!(
417 err,
418 Error::Parse(ParseError::EndPositionExceedsSize(_, _, _))
419 ));
420
421 assert_eq!(
422 err.to_string(),
423 "parse error: the end position (3) exceeds the size of the chromosome `seq0` (2)"
424 );
425 }
426
427 #[test]
428 pub fn display() {
429 let header = "chain 0 seq0 2 + 0 2 seq0 2 - 0 2 1"
430 .parse::<Record>()
431 .unwrap();
432
433 assert_eq!(header.to_string(), "chain 0 seq0 2 + 0 2 seq0 2 - 0 2 1");
434 }
435}