1use crate::error::AlignError;
11use crate::hit::Hit;
12use std::io::BufRead;
13
14pub fn parse_tsv<R: BufRead>(
15 rdr: R,
16 coverage_is_fraction: bool,
17) -> Result<Vec<Hit>, AlignError> {
18 let mut out = Vec::new();
19 for (i, line) in rdr.lines().enumerate() {
20 let line = line.map_err(|e| AlignError::TsvParse {
21 line: (i + 1) as u64,
22 msg: format!("read error: {e}"),
23 })?;
24 let l = line.trim_end_matches('\r');
25 if l.is_empty() || l.starts_with('#') {
26 continue;
27 }
28 let hit = parse_hit_line(l, coverage_is_fraction, (i + 1) as u64)?;
29 out.push(hit);
30 }
31 Ok(out)
32}
33
34fn parse_hit_line(
35 line: &str,
36 coverage_is_fraction: bool,
37 line_no: u64,
38) -> Result<Hit, AlignError> {
39 let cols: Vec<&str> = line.splitn(8, '\t').collect();
42 if cols.len() < 8 {
43 return Err(AlignError::TsvParse {
44 line: line_no,
45 msg: format!("expected 8 tab-separated fields, got {}", cols.len()),
46 });
47 }
48 let parse_f32 = |s: &str, field: &str| -> Result<f32, AlignError> {
49 s.trim().parse().map_err(|_| AlignError::TsvParse {
50 line: line_no,
51 msg: format!("{field} `{s}` is not a float"),
52 })
53 };
54 let parse_f64 = |s: &str, field: &str| -> Result<f64, AlignError> {
55 s.trim().parse().map_err(|_| AlignError::TsvParse {
56 line: line_no,
57 msg: format!("{field} `{s}` is not a float"),
58 })
59 };
60 let parse_i = |s: &str, field: &str| -> Result<i32, AlignError> {
61 s.trim().parse().map_err(|_| AlignError::TsvParse {
62 line: line_no,
63 msg: format!("{field} `{s}` is not an integer"),
64 })
65 };
66
67 let mut qcov = parse_f32(cols[4], "qcov")?;
68 if coverage_is_fraction {
69 qcov *= 100.0;
70 }
71 Ok(Hit {
72 qseqid: cols[0].to_string(),
73 pident: parse_f32(cols[1], "pident")?,
74 evalue: parse_f64(cols[2], "evalue")?,
75 bitscore: parse_f32(cols[3], "bitscore")?,
76 qcov,
77 stitle: cols[5].to_string(),
78 sstart: parse_i(cols[6], "sstart")?,
79 send: parse_i(cols[7], "send")?,
80 })
81}
82
83#[cfg(test)]
84mod tests {
85 use super::*;
86 use std::io::Cursor;
87
88 #[test]
89 fn parses_blast_row() {
90 let data = "q1\t95.5\t1e-100\t312.5\t90\ttarget1 some desc\t1\t200\n";
91 let hits = parse_tsv(Cursor::new(data), false).unwrap();
92 assert_eq!(hits.len(), 1);
93 assert_eq!(hits[0].qseqid, "q1");
94 assert_eq!(hits[0].pident, 95.5);
95 assert_eq!(hits[0].evalue, 1e-100);
96 assert_eq!(hits[0].bitscore, 312.5);
97 assert_eq!(hits[0].qcov, 90.0);
98 assert_eq!(hits[0].stitle, "target1 some desc");
99 assert_eq!(hits[0].sstart, 1);
100 assert_eq!(hits[0].send, 200);
101 }
102
103 #[test]
104 fn rescales_mmseqs_fraction_coverage() {
105 let data = "q1\t95.5\t1e-100\t312.5\t0.9\ttarget1\t1\t200\n";
106 let hits = parse_tsv(Cursor::new(data), true).unwrap();
107 assert_eq!(hits[0].qcov, 90.0_f32);
108 }
109
110 #[test]
111 fn skips_blank_and_comment() {
112 let data = "\n#header\nq1\t95.5\t1e-100\t312.5\t90\tt1\t1\t200\n\n";
113 let hits = parse_tsv(Cursor::new(data), false).unwrap();
114 assert_eq!(hits.len(), 1);
115 }
116
117 #[test]
118 fn error_reports_line_number() {
119 let data = "q1\t95.5\t1e-100\t312.5\t90\tt1\t1\t200\nBAD_LINE\n";
120 let err = parse_tsv(Cursor::new(data), false).unwrap_err();
121 match err {
122 AlignError::TsvParse { line, .. } => assert_eq!(line, 2),
123 other => panic!("unexpected: {other}"),
124 }
125 }
126
127 #[test]
128 fn stitle_with_internal_spaces_preserved() {
129 let data = "q1\t95.5\t1e-100\t312.5\t90\ta b c d\t1\t200\n";
130 let h = parse_tsv(Cursor::new(data), false).unwrap();
131 assert_eq!(h[0].stitle, "a b c d");
132 }
133}