1
2use anyhow::{Context, Result};
3use std::fs::File;
4use std::io::{BufRead, BufReader};
5use std::path::Path;
6
7#[derive(Debug, Clone)]
8pub struct PafRecord {
9
10 pub query_name: String,
11
12 pub query_len: usize,
13
14 pub query_start: usize,
15
16 pub query_end: usize,
17
18 pub strand: char,
19
20 pub target_name: String,
21
22 pub target_len: usize,
23
24 pub target_start: usize,
25
26 pub target_end: usize,
27
28 pub matches: usize,
29
30 pub block_len: usize,
31}
32
33impl PafRecord {
34
35 pub fn parse_line(line: &str) -> Result<Self> {
36 let fields: Vec<&str> = line.split('\t').collect();
37 if fields.len() < 12 {
38 anyhow::bail!("Invalid PAF line: fewer than 12 fields");
39 }
40
41 Ok(Self {
42 query_name: fields[0].to_string(),
43 query_len: fields[1].parse().context("Invalid query length")?,
44 query_start: fields[2].parse().context("Invalid query start")?,
45 query_end: fields[3].parse().context("Invalid query end")?,
46 strand: fields[4].chars().next().unwrap_or('+'),
47 target_name: fields[5].to_string(),
48 target_len: fields[6].parse().context("Invalid target length")?,
49 target_start: fields[7].parse().context("Invalid target start")?,
50 target_end: fields[8].parse().context("Invalid target end")?,
51 matches: fields[9].parse().context("Invalid matches count")?,
52 block_len: fields[10].parse().context("Invalid block length")?,
53 })
54 }
55
56 pub fn calculate_identity(&self) -> f64 {
57 if self.block_len == 0 {
58 return 0.0;
59 }
60 (self.matches as f64 / self.block_len as f64) * 100.0
61 }
62
63 pub fn calculate_coverage(&self) -> f64 {
64 if self.target_len == 0 {
65 return 0.0;
66 }
67 ((self.target_end - self.target_start) as f64 / self.target_len as f64) * 100.0
68 }
69}
70
71pub struct PafReader {
72 reader: BufReader<File>,
73 line_buf: String,
74}
75
76impl PafReader {
77
78 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
79 let file = File::open(path.as_ref())
80 .with_context(|| format!("Failed to open PAF: {}", path.as_ref().display()))?;
81 Ok(Self {
82 reader: BufReader::with_capacity(1024 * 1024, file),
83 line_buf: String::with_capacity(512),
84 })
85 }
86
87 pub fn read_next(&mut self) -> Result<Option<PafRecord>> {
88 self.line_buf.clear();
89 if self.reader.read_line(&mut self.line_buf)? == 0 {
90 return Ok(None);
91 }
92
93 let line = self.line_buf.trim_end();
94 if line.is_empty() {
95 return self.read_next();
96 }
97
98 Ok(Some(PafRecord::parse_line(line)?))
99 }
100}
101
102impl Iterator for PafReader {
103 type Item = Result<PafRecord>;
104
105 fn next(&mut self) -> Option<Self::Item> {
106 match self.read_next() {
107 Ok(Some(record)) => Some(Ok(record)),
108 Ok(None) => None,
109 Err(e) => Some(Err(e)),
110 }
111 }
112}
113
114#[cfg(test)]
115mod tests {
116 use super::*;
117
118 #[test]
119 fn test_parse_paf_line() {
120 let line = "read1\t150\t10\t140\t+\tgene1\t1000\t100\t230\t120\t130\t60";
121 let record = PafRecord::parse_line(line).unwrap();
122
123 assert_eq!(record.query_name, "read1");
124 assert_eq!(record.query_len, 150);
125 assert_eq!(record.query_start, 10);
126 assert_eq!(record.query_end, 140);
127 assert_eq!(record.strand, '+');
128 assert_eq!(record.target_name, "gene1");
129 assert_eq!(record.target_len, 1000);
130 assert_eq!(record.matches, 120);
131 assert_eq!(record.block_len, 130);
132 }
133
134 #[test]
135 fn test_calculate_identity() {
136 let record = PafRecord {
137 query_name: "test".to_string(),
138 query_len: 100,
139 query_start: 0,
140 query_end: 100,
141 strand: '+',
142 target_name: "ref".to_string(),
143 target_len: 100,
144 target_start: 0,
145 target_end: 100,
146 matches: 95,
147 block_len: 100,
148 };
149
150 assert_eq!(record.calculate_identity(), 95.0);
151 }
152
153 #[test]
154 fn test_calculate_coverage() {
155 let record = PafRecord {
156 query_name: "test".to_string(),
157 query_len: 100,
158 query_start: 0,
159 query_end: 100,
160 strand: '+',
161 target_name: "ref".to_string(),
162 target_len: 200,
163 target_start: 0,
164 target_end: 100,
165 matches: 100,
166 block_len: 100,
167 };
168
169 assert_eq!(record.calculate_coverage(), 50.0);
170 }
171
172 #[test]
173 fn test_invalid_paf_line() {
174 let line = "incomplete\tline";
175 assert!(PafRecord::parse_line(line).is_err());
176 }
177}