Skip to main content

argenus/
paf.rs

1
2use anyhow::{Context, Result};
3use std::fs::File;
4use std::io::{BufRead, BufReader};
5use std::path::Path;
6
7#[derive(Debug, Clone)]
8pub struct PafRecord {
9
10    pub query_name: String,
11
12    pub query_len: usize,
13
14    pub query_start: usize,
15
16    pub query_end: usize,
17
18    pub strand: char,
19
20    pub target_name: String,
21
22    pub target_len: usize,
23
24    pub target_start: usize,
25
26    pub target_end: usize,
27
28    pub matches: usize,
29
30    pub block_len: usize,
31}
32
33impl PafRecord {
34
35    pub fn parse_line(line: &str) -> Result<Self> {
36        let fields: Vec<&str> = line.split('\t').collect();
37        if fields.len() < 12 {
38            anyhow::bail!("Invalid PAF line: fewer than 12 fields");
39        }
40
41        Ok(Self {
42            query_name: fields[0].to_string(),
43            query_len: fields[1].parse().context("Invalid query length")?,
44            query_start: fields[2].parse().context("Invalid query start")?,
45            query_end: fields[3].parse().context("Invalid query end")?,
46            strand: fields[4].chars().next().unwrap_or('+'),
47            target_name: fields[5].to_string(),
48            target_len: fields[6].parse().context("Invalid target length")?,
49            target_start: fields[7].parse().context("Invalid target start")?,
50            target_end: fields[8].parse().context("Invalid target end")?,
51            matches: fields[9].parse().context("Invalid matches count")?,
52            block_len: fields[10].parse().context("Invalid block length")?,
53        })
54    }
55
56    pub fn calculate_identity(&self) -> f64 {
57        if self.block_len == 0 {
58            return 0.0;
59        }
60        (self.matches as f64 / self.block_len as f64) * 100.0
61    }
62
63    pub fn calculate_coverage(&self) -> f64 {
64        if self.target_len == 0 {
65            return 0.0;
66        }
67        ((self.target_end - self.target_start) as f64 / self.target_len as f64) * 100.0
68    }
69}
70
71pub struct PafReader {
72    reader: BufReader<File>,
73    line_buf: String,
74}
75
76impl PafReader {
77
78    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
79        let file = File::open(path.as_ref())
80            .with_context(|| format!("Failed to open PAF: {}", path.as_ref().display()))?;
81        Ok(Self {
82            reader: BufReader::with_capacity(1024 * 1024, file),
83            line_buf: String::with_capacity(512),
84        })
85    }
86
87    pub fn read_next(&mut self) -> Result<Option<PafRecord>> {
88        self.line_buf.clear();
89        if self.reader.read_line(&mut self.line_buf)? == 0 {
90            return Ok(None);
91        }
92
93        let line = self.line_buf.trim_end();
94        if line.is_empty() {
95            return self.read_next();
96        }
97
98        Ok(Some(PafRecord::parse_line(line)?))
99    }
100}
101
102impl Iterator for PafReader {
103    type Item = Result<PafRecord>;
104
105    fn next(&mut self) -> Option<Self::Item> {
106        match self.read_next() {
107            Ok(Some(record)) => Some(Ok(record)),
108            Ok(None) => None,
109            Err(e) => Some(Err(e)),
110        }
111    }
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117
118    #[test]
119    fn test_parse_paf_line() {
120        let line = "read1\t150\t10\t140\t+\tgene1\t1000\t100\t230\t120\t130\t60";
121        let record = PafRecord::parse_line(line).unwrap();
122
123        assert_eq!(record.query_name, "read1");
124        assert_eq!(record.query_len, 150);
125        assert_eq!(record.query_start, 10);
126        assert_eq!(record.query_end, 140);
127        assert_eq!(record.strand, '+');
128        assert_eq!(record.target_name, "gene1");
129        assert_eq!(record.target_len, 1000);
130        assert_eq!(record.matches, 120);
131        assert_eq!(record.block_len, 130);
132    }
133
134    #[test]
135    fn test_calculate_identity() {
136        let record = PafRecord {
137            query_name: "test".to_string(),
138            query_len: 100,
139            query_start: 0,
140            query_end: 100,
141            strand: '+',
142            target_name: "ref".to_string(),
143            target_len: 100,
144            target_start: 0,
145            target_end: 100,
146            matches: 95,
147            block_len: 100,
148        };
149
150        assert_eq!(record.calculate_identity(), 95.0);
151    }
152
153    #[test]
154    fn test_calculate_coverage() {
155        let record = PafRecord {
156            query_name: "test".to_string(),
157            query_len: 100,
158            query_start: 0,
159            query_end: 100,
160            strand: '+',
161            target_name: "ref".to_string(),
162            target_len: 200,
163            target_start: 0,
164            target_end: 100,
165            matches: 100,
166            block_len: 100,
167        };
168
169        assert_eq!(record.calculate_coverage(), 50.0);
170    }
171
172    #[test]
173    fn test_invalid_paf_line() {
174        let line = "incomplete\tline";
175        assert!(PafRecord::parse_line(line).is_err());
176    }
177}