use crate::error::AlignError;
use crate::hit::Hit;
use std::io::BufRead;
pub fn parse_tsv<R: BufRead>(
rdr: R,
coverage_is_fraction: bool,
) -> Result<Vec<Hit>, AlignError> {
let mut out = Vec::new();
for (i, line) in rdr.lines().enumerate() {
let line = line.map_err(|e| AlignError::TsvParse {
line: (i + 1) as u64,
msg: format!("read error: {e}"),
})?;
let l = line.trim_end_matches('\r');
if l.is_empty() || l.starts_with('#') {
continue;
}
let hit = parse_hit_line(l, coverage_is_fraction, (i + 1) as u64)?;
out.push(hit);
}
Ok(out)
}
fn parse_hit_line(
line: &str,
coverage_is_fraction: bool,
line_no: u64,
) -> Result<Hit, AlignError> {
let cols: Vec<&str> = line.splitn(8, '\t').collect();
if cols.len() < 8 {
return Err(AlignError::TsvParse {
line: line_no,
msg: format!("expected 8 tab-separated fields, got {}", cols.len()),
});
}
let parse_f32 = |s: &str, field: &str| -> Result<f32, AlignError> {
s.trim().parse().map_err(|_| AlignError::TsvParse {
line: line_no,
msg: format!("{field} `{s}` is not a float"),
})
};
let parse_f64 = |s: &str, field: &str| -> Result<f64, AlignError> {
s.trim().parse().map_err(|_| AlignError::TsvParse {
line: line_no,
msg: format!("{field} `{s}` is not a float"),
})
};
let parse_i = |s: &str, field: &str| -> Result<i32, AlignError> {
s.trim().parse().map_err(|_| AlignError::TsvParse {
line: line_no,
msg: format!("{field} `{s}` is not an integer"),
})
};
let mut qcov = parse_f32(cols[4], "qcov")?;
if coverage_is_fraction {
qcov *= 100.0;
}
Ok(Hit {
qseqid: cols[0].to_string(),
pident: parse_f32(cols[1], "pident")?,
evalue: parse_f64(cols[2], "evalue")?,
bitscore: parse_f32(cols[3], "bitscore")?,
qcov,
stitle: cols[5].to_string(),
sstart: parse_i(cols[6], "sstart")?,
send: parse_i(cols[7], "send")?,
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn parses_blast_row() {
let data = "q1\t95.5\t1e-100\t312.5\t90\ttarget1 some desc\t1\t200\n";
let hits = parse_tsv(Cursor::new(data), false).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].qseqid, "q1");
assert_eq!(hits[0].pident, 95.5);
assert_eq!(hits[0].evalue, 1e-100);
assert_eq!(hits[0].bitscore, 312.5);
assert_eq!(hits[0].qcov, 90.0);
assert_eq!(hits[0].stitle, "target1 some desc");
assert_eq!(hits[0].sstart, 1);
assert_eq!(hits[0].send, 200);
}
#[test]
fn rescales_mmseqs_fraction_coverage() {
let data = "q1\t95.5\t1e-100\t312.5\t0.9\ttarget1\t1\t200\n";
let hits = parse_tsv(Cursor::new(data), true).unwrap();
assert_eq!(hits[0].qcov, 90.0_f32);
}
#[test]
fn skips_blank_and_comment() {
let data = "\n#header\nq1\t95.5\t1e-100\t312.5\t90\tt1\t1\t200\n\n";
let hits = parse_tsv(Cursor::new(data), false).unwrap();
assert_eq!(hits.len(), 1);
}
#[test]
fn error_reports_line_number() {
let data = "q1\t95.5\t1e-100\t312.5\t90\tt1\t1\t200\nBAD_LINE\n";
let err = parse_tsv(Cursor::new(data), false).unwrap_err();
match err {
AlignError::TsvParse { line, .. } => assert_eq!(line, 2),
other => panic!("unexpected: {other}"),
}
}
#[test]
fn stitle_with_internal_spaces_preserved() {
let data = "q1\t95.5\t1e-100\t312.5\t90\ta b c d\t1\t200\n";
let h = parse_tsv(Cursor::new(data), false).unwrap();
assert_eq!(h[0].stitle, "a b c d");
}
}