1use std::collections::HashMap;
2use std::fs::File;
3use std::io::{self, BufRead};
4use std::path::Path;
5
6use crate::{Error, ErrorKind, Result};
7
8#[derive(Debug)]
10pub enum Type {
11 Int(i64),
12 Float(f64),
13 String(String),
14 Char(char),
15}
16
17impl Type {
18 fn parse(field_type: &str, value: &str) -> Option<Self> {
19 match field_type {
20 "i" => value.parse::<i64>().ok().map(Type::Int),
21 "f" => value.parse::<f64>().ok().map(Type::Float),
22 "Z" => Some(Type::String(value.to_string())),
23 "A" => value.chars().next().map(Type::Char),
24 _ => Some(Type::String(value.to_string())), }
26 }
27
28 pub fn get_int(&self) -> Option<&i64> {
30 match self {
31 Type::Int(v) => Some(v),
32 _ => None,
33 }
34 }
35
36 pub fn get_float(&self) -> Option<&f64> {
38 match self {
39 Type::Float(v) => Some(v),
40 _ => None,
41 }
42 }
43
44 pub fn get_string(&self) -> Option<&String> {
46 match self {
47 Type::String(v) => Some(v),
48 _ => None,
49 }
50 }
51
52 pub fn get_char(&self) -> Option<&char> {
54 match self {
55 Type::Char(v) => Some(v),
56 _ => None,
57 }
58 }
59}
60
61#[derive(Debug)]
63#[allow(non_camel_case_types)]
64pub enum Tag {
65 tp(Type),
67 cm(Type),
69 s1(Type),
71 s2(Type),
73 NM(Type),
75 MD(Type),
77 AS(Type),
79 SA(Type),
81 ms(Type),
83 nn(Type),
85 ts(Type),
87 cg(Type),
89 cs(Type),
91 dv(Type),
93 de(Type),
95 rl(Type),
97 zd(Type),
99}
100
101impl Tag {
102 pub fn parse(tag: &str, value: Type) -> Result<Self> {
104 match tag {
105 "tp" => Ok(Tag::tp(value)),
106 "cm" => Ok(Tag::cm(value)),
107 "s1" => Ok(Tag::s1(value)),
108 "s2" => Ok(Tag::s2(value)),
109 "NM" => Ok(Tag::NM(value)),
110 "MD" => Ok(Tag::MD(value)),
111 "AS" => Ok(Tag::AS(value)),
112 "SA" => Ok(Tag::SA(value)),
113 "ms" => Ok(Tag::ms(value)),
114 "nn" => Ok(Tag::nn(value)),
115 "ts" => Ok(Tag::ts(value)),
116 "cg" => Ok(Tag::cg(value)),
117 "cs" => Ok(Tag::cs(value)),
118 "dv" => Ok(Tag::dv(value)),
119 "de" => Ok(Tag::de(value)),
120 "rl" => Ok(Tag::rl(value)),
121 "zd" => Ok(Tag::zd(value)),
122 _ => Err(Error::new(ErrorKind::ReadRecord(format!(
123 "Invalid PAF tag: {}",
124 tag
125 )))),
126 }
127 }
128
129 fn to_string(&self) -> String {
131 match self {
132 Tag::tp(_) => "tp".into(),
133 Tag::cm(_) => "cm".into(),
134 Tag::s1(_) => "s1".into(),
135 Tag::s2(_) => "s2".into(),
136 Tag::NM(_) => "NM".into(),
137 Tag::MD(_) => "MD".into(),
138 Tag::AS(_) => "AS".into(),
139 Tag::SA(_) => "SA".into(),
140 Tag::ms(_) => "ms".into(),
141 Tag::nn(_) => "nn".into(),
142 Tag::ts(_) => "ts".into(),
143 Tag::cg(_) => "cg".into(),
144 Tag::cs(_) => "cs".into(),
145 Tag::dv(_) => "dv".into(),
146 Tag::de(_) => "de".into(),
147 Tag::rl(_) => "rl".into(),
148 Tag::zd(_) => "zd".into(),
149 }
150 }
151}
152
153#[derive(Debug)]
155pub struct PafRecord {
156 query_name: String,
158 query_len: u32,
160 query_start: u32,
162 query_end: u32,
164 strand: char,
166 target_name: String,
168 target_len: u32,
170 target_start: u32,
172 target_end: u32,
174 residue_matches: u32,
176 alignment_block_len: u32,
178 mapping_quality: u8,
180
181 optional: HashMap<String, Tag>,
183}
184
185impl PafRecord {
186 pub fn new(
188 query_name: String,
189 query_len: u32,
190 query_start: u32,
191 query_end: u32,
192 strand: char,
193 target_name: String,
194 target_len: u32,
195 target_start: u32,
196 target_end: u32,
197 residue_matches: u32,
198 alignment_block_len: u32,
199 mapping_quality: u8,
200 optional: HashMap<String, Tag>,
201 ) -> PafRecord {
202 PafRecord {
203 query_name,
204 query_len,
205 query_start,
206 query_end,
207 strand,
208 target_name,
209 target_len,
210 target_start,
211 target_end,
212 residue_matches,
213 alignment_block_len,
214 mapping_quality,
215 optional,
216 }
217 }
218
219 pub fn query_name(&self) -> &str {
221 &self.query_name
222 }
223 pub fn query_len(&self) -> u32 {
225 self.query_len
226 }
227 pub fn query_start(&self) -> u32 {
229 self.query_start
230 }
231 pub fn query_end(&self) -> u32 {
233 self.query_end
234 }
235 pub fn target_name(&self) -> &str {
237 &self.target_name
238 }
239 pub fn target_len(&self) -> u32 {
241 self.target_len
242 }
243 pub fn target_start(&self) -> u32 {
245 self.target_start
246 }
247 pub fn target_end(&self) -> u32 {
249 self.target_end
250 }
251 pub fn residue_matches(&self) -> u32 {
253 self.residue_matches
254 }
255 pub fn alignment_block_len(&self) -> u32 {
257 self.alignment_block_len
258 }
259 pub fn mapping_quality(&self) -> u8 {
261 self.mapping_quality
262 }
263 pub fn strand(&self) -> char {
265 self.strand
266 }
267 pub fn optional_fields(&self) -> &HashMap<String, Tag> {
269 &self.optional
270 }
271 pub fn tp(&self) -> Option<&char> {
273 self.optional.get("tp").map(|tag| match tag {
274 Tag::tp(t) => t.get_char().unwrap(),
275 _ => panic!("Invalid tag"),
276 })
277 }
278 pub fn cm(&self) -> Option<&i64> {
280 self.optional.get("cm").map(|tag| match tag {
281 Tag::cm(t) => t.get_int().unwrap(),
282 _ => panic!("Invalid tag"),
283 })
284 }
285 pub fn s1(&self) -> Option<&i64> {
287 self.optional.get("s1").map(|tag| match tag {
288 Tag::s1(t) => t.get_int().unwrap(),
289 _ => panic!("Invalid tag"),
290 })
291 }
292 pub fn s2(&self) -> Option<&i64> {
294 self.optional.get("s2").map(|tag| match tag {
295 Tag::s2(t) => t.get_int().unwrap(),
296 _ => panic!("Invalid tag"),
297 })
298 }
299 pub fn nm(&self) -> Option<&i64> {
301 self.optional.get("NM").map(|tag| match tag {
302 Tag::NM(t) => t.get_int().unwrap(),
303 _ => panic!("Invalid tag"),
304 })
305 }
306 pub fn md(&self) -> Option<&String> {
308 self.optional.get("MD").map(|tag| match tag {
309 Tag::MD(t) => t.get_string().unwrap(),
310 _ => panic!("Invalid tag"),
311 })
312 }
313 pub fn as_(&self) -> Option<&i64> {
315 self.optional.get("AS").map(|tag| match tag {
316 Tag::AS(t) => t.get_int().unwrap(),
317 _ => panic!("Invalid tag"),
318 })
319 }
320 pub fn sa(&self) -> Option<&String> {
322 self.optional.get("SA").map(|tag| match tag {
323 Tag::SA(t) => t.get_string().unwrap(),
324 _ => panic!("Invalid tag"),
325 })
326 }
327 pub fn ms(&self) -> Option<&i64> {
329 self.optional.get("ms").map(|tag| match tag {
330 Tag::ms(t) => t.get_int().unwrap(),
331 _ => panic!("Invalid tag"),
332 })
333 }
334 pub fn nn(&self) -> Option<&i64> {
336 self.optional.get("nn").map(|tag| match tag {
337 Tag::nn(t) => t.get_int().unwrap(),
338 _ => panic!("Invalid tag"),
339 })
340 }
341 pub fn ts(&self) -> Option<&char> {
343 self.optional.get("ts").map(|tag| match tag {
344 Tag::ts(t) => t.get_char().unwrap(),
345 _ => panic!("Invalid tag"),
346 })
347 }
348 pub fn cg(&self) -> Option<&String> {
350 self.optional.get("cg").map(|tag| match tag {
351 Tag::cg(t) => t.get_string().unwrap(),
352 _ => panic!("Invalid tag"),
353 })
354 }
355 pub fn cs(&self) -> Option<&String> {
357 self.optional.get("cs").map(|tag| match tag {
358 Tag::cs(t) => t.get_string().unwrap(),
359 _ => panic!("Invalid tag"),
360 })
361 }
362 pub fn dv(&self) -> Option<&f64> {
364 self.optional.get("dv").map(|tag| match tag {
365 Tag::dv(t) => t.get_float().unwrap(),
366 _ => panic!("Invalid tag"),
367 })
368 }
369 pub fn de(&self) -> Option<&f64> {
371 self.optional.get("de").map(|tag| match tag {
372 Tag::de(t) => t.get_float().unwrap(),
373 _ => panic!("Invalid tag"),
374 })
375 }
376 pub fn rl(&self) -> Option<&i64> {
378 self.optional.get("rl").map(|tag| match tag {
379 Tag::rl(t) => t.get_int().unwrap(),
380 _ => panic!("Invalid tag"),
381 })
382 }
383}
384
385pub struct Reader<R> {
387 reader: io::BufReader<R>,
388 line: u64,
389}
390
391impl Reader<File> {
392 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> {
394 Ok(Reader::new(File::open(path)?))
395 }
396
397 pub fn from_reader<R: io::Read>(rdr: R) -> Reader<R> {
399 Reader::new(rdr)
400 }
401}
402
403fn parse_optional_fields(fields: &[&str]) -> Result<HashMap<String, Tag>> {
405 let mut map = HashMap::new();
406
407 for field in fields {
409 let parts: Vec<&str> = field.split(':').collect();
410 if parts.len() < 3 {
411 return Err(Error::new(ErrorKind::ReadRecord(
412 "Invalid PAF line: invalid optional field - too few parts".into(),
413 )));
414 }
415
416 let tag = parts[0];
417 let type_ = parts[1];
418 let inner = parts[2];
419
420 let type_ = Type::parse(type_, inner).ok_or_else(|| {
421 Error::new(ErrorKind::ReadRecord(format!(
422 "Invalid PAF line: invalid optional field type: {}",
423 type_
424 )))
425 })?;
426
427 let tag = Tag::parse(tag, type_)?;
428
429 map.insert(tag.to_string(), tag);
430 }
431 Ok(map)
432}
433
434impl<R: io::Read> Reader<R> {
435 pub fn new(rdr: R) -> Self {
437 Reader {
438 reader: io::BufReader::new(rdr),
439 line: 0,
440 }
441 }
442
443 pub fn records(&mut self) -> RecordsIter<R> {
445 RecordsIter::new(self)
446 }
447
448 pub fn into_records(self) -> RecordsIntoIter<R> {
450 RecordsIntoIter::new(self)
451 }
452
453 pub fn read_record(&mut self) -> Result<Option<PafRecord>> {
455 let mut line = String::new();
456 let bytes_read = match self.reader.read_line(&mut line) {
457 Ok(b) => b,
458 Err(e) => return Err(Error::new(ErrorKind::Io(e))),
459 };
460
461 if bytes_read == 0 {
462 return Ok(None); }
464
465 let columns: Vec<&str> = line.trim().split('\t').collect();
466 if columns.len() < 12 {
467 return Err(Error::new(ErrorKind::ReadRecord(format!(
468 "Invalid PAF at line {}: less than 12 mandatory fields",
469 self.line
470 ))));
471 }
472
473 let query_name = columns[0].to_string();
475 let query_len = columns[1].parse::<u32>()?;
476 let query_start = columns[2].parse::<u32>()?;
477 let query_end = columns[3].parse::<u32>()?;
478 let strand = columns[4]
479 .chars()
480 .next()
481 .ok_or_else(|| Error::new(ErrorKind::ReadRecord("Empty strand field".into())))?;
482
483 if strand != '+' && strand != '-' {
484 return Err(Error::new(ErrorKind::ReadRecord(format!(
485 "Invalid strand field at line {}: {}",
486 self.line, strand
487 ))));
488 }
489
490 let target_name = columns[5].to_string();
491 let target_len = columns[6].parse::<u32>()?;
492 let target_start = columns[7].parse::<u32>()?;
493 let target_end = columns[8].parse::<u32>()?;
494 let residue_matches = columns[9].parse::<u32>()?;
495 let alignment_block_len = columns[10].parse::<u32>()?;
496 let mapping_quality = columns[11].parse::<u8>()?;
497
498 let optional = parse_optional_fields(&columns[12..])?;
499
500 let record = PafRecord {
501 query_name,
502 query_len,
503 query_start,
504 query_end,
505 strand,
506 target_name,
507 target_len,
508 target_start,
509 target_end,
510 residue_matches,
511 alignment_block_len,
512 mapping_quality,
513 optional,
514 };
515
516 Ok(Some(record))
517 }
518}
519
520pub struct RecordsIter<'r, R: 'r> {
522 rdr: &'r mut Reader<R>,
524}
525
526impl<'r, R: io::Read> RecordsIter<'r, R> {
527 fn new(rdr: &'r mut Reader<R>) -> RecordsIter<'r, R> {
529 RecordsIter { rdr }
530 }
531 pub fn reader(&self) -> &Reader<R> {
533 self.rdr
534 }
535
536 pub fn reader_mut(&mut self) -> &mut Reader<R> {
538 self.rdr
539 }
540}
541
542impl<'r, R: io::Read> Iterator for RecordsIter<'r, R> {
543 type Item = Result<PafRecord>;
544
545 fn next(&mut self) -> Option<Result<PafRecord>> {
546 match self.rdr.read_record() {
547 Ok(Some(r)) => {
548 self.rdr.line += 1;
549 Some(Ok(r))
550 }
551 Ok(None) => None,
552 Err(e) => Some(Err(e)),
553 }
554 }
555}
556
557pub struct RecordsIntoIter<R> {
559 rdr: Reader<R>,
561}
562
563impl<R: io::Read> RecordsIntoIter<R> {
564 fn new(rdr: Reader<R>) -> RecordsIntoIter<R> {
566 RecordsIntoIter { rdr }
567 }
568 pub fn reader(&self) -> &Reader<R> {
570 &self.rdr
571 }
572
573 pub fn reader_mut(&mut self) -> &mut Reader<R> {
575 &mut self.rdr
576 }
577
578 pub fn into_reader(self) -> Reader<R> {
580 self.rdr
581 }
582}
583
584impl<R: io::Read> Iterator for RecordsIntoIter<R> {
585 type Item = Result<PafRecord>;
586
587 fn next(&mut self) -> Option<Result<PafRecord>> {
588 match self.rdr.read_record() {
589 Ok(Some(r)) => {
590 self.rdr.line += 1;
591 Some(Ok(r))
592 }
593 Ok(None) => None,
594 Err(e) => Some(Err(e)),
595 }
596 }
597}
598
599#[cfg(test)]
600mod tests {
601 use super::Reader;
602
603 const PAF_RECORD_1: &[u8] = b"NC_041798.1 41841605 28850796 29394458 + SUPER_10 44636193 31974877 32470190 495111 515145 60 NM:i:48730 ms:i:488389 AS:i:439775 nn:i:28696 tp:A:P cm:i:46495 s1:i:466570 s2:i:10896 de:f:0.0003 zd:i:3 rl:i:3568165 cg:Z:770M1D945M1D389M1I9141M1I356M1D196M1I30268M2D789M3I992M2D1819M1D7M1D7M1I10M6D2922M1D17899M2D1010M4D12324M1I1376M1D5549M6D1839M1I2206M1D770M1D2287M1D16103M1D3238M1D2014M1D140M5I14M1D8496M2I2151M1I335M1D14424M1D1093M1I567M1D1835M2D1995M1D5257M1D639M1I699M1I133M1I52M1I99M2I26M1I195M1I1543M1I240M1I176M1I412M2D159M1I261M1D1158M1I933M2D12836M1D993M1D12263M2D4975M2I16452M3I396M1I3924M2D929M3I3015M1D225M1D4225M1D717M2D752M1D2051M1D5110M1D15073M1D1053M2D4369M1D619M3I13564M2I4386M1D1431M2D617M1I612M2I3445M2I252M1D220M1D237M1I903M1I145M1I53M1I197M1I1280M1D4201M1D1736M1D1289M1I3344M2D5456M1D488M1I1655M2D1830M1D796M1I19341M2D1165M1D1926M1D6041M1D2170M1D3917M1D926M1D759M1D400M2I8802M1I836M1I381M48451I166M1I4896M2D1522M49D2729M1D947M2D927M6D911M2D800M2D3040M1D13213M1D8999M3D847M1D220M1I673M1D165M1I901M1I2887M1I105M2I597M1I1201M1I53M2I494M1I23M1D99M1I146M1D29906M1D5661M1I27598M1D520M1I166M2D11600M1D388M1D844M1D4583M1D8390M1D5789M2D3773M1D4494M1D448M1D846M3D531M";
604
605 #[test]
606 fn test_read_record() {
607 let mut parser = Reader::from_reader(&PAF_RECORD_1[..]);
608 let record = parser.read_record().unwrap().unwrap();
609
610 assert_eq!(record.query_name(), "NC_041798.1");
611 assert_eq!(record.query_len(), 41841605);
612 assert_eq!(record.query_start(), 28850796);
613 assert_eq!(record.query_end(), 29394458);
614 assert_eq!(record.strand(), '+');
615 assert_eq!(record.target_name(), "SUPER_10");
616 assert_eq!(record.target_len(), 44636193);
617 assert_eq!(record.target_start(), 31974877);
618 assert_eq!(record.target_end(), 32470190);
619 assert_eq!(record.residue_matches(), 495111);
620 assert_eq!(record.alignment_block_len(), 515145);
621 assert_eq!(record.mapping_quality(), 60);
622
623 let nm = record.nm().unwrap();
624 assert_eq!(nm, &48730);
625 }
626}