use std::{
fs::File,
io::{self, BufRead, Read},
path::Path,
};
use crate::{Error, ErrorKind, Record, Result};
#[derive(Clone, Copy)]
pub enum Flag {
D,
Ngs,
}
pub struct Reader<R> {
rdr: io::BufReader<R>,
line: u64,
id: String,
flag: Flag,
}
impl Reader<File> {
pub fn from_path<P: AsRef<Path>>(path: P, flag: Flag) -> Result<Reader<File>> {
Ok(Reader::new(File::open(path)?, flag))
}
pub fn from_reader<R: io::Read>(rdr: R, flag: Flag) -> Reader<R> {
Reader::new(rdr, flag)
}
}
impl<R: io::Read> Reader<R> {
pub fn new(rdr: R, flag: Flag) -> Reader<R> {
Reader {
rdr: io::BufReader::new(rdr),
line: 0,
id: String::new(),
flag,
}
}
pub fn records(&mut self) -> RecordsIter<R> {
RecordsIter::new(self)
}
pub fn into_records(self) -> RecordsIntoIter<R> {
RecordsIntoIter::new(self)
}
fn read_record(&mut self) -> Result<Option<Record>> {
let mut record = Record::default();
let reader = self.rdr.by_ref();
let mut temp_buf = String::new();
loop {
self.line += 1;
temp_buf.clear();
let bytes = reader.read_line(&mut temp_buf)?;
if bytes == 0 {
if record == Record::default() {
return Ok(None);
} else {
return Ok(Some(record));
}
}
let parsed = parse_input_line(temp_buf.clone(), &mut record, self.flag);
match parsed {
Ok(e) => match e {
Some(sequence_id) => {
if let Some(id) = sequence_id {
self.id = id;
continue;
}
break;
}
None => continue,
},
Err(e) => {
return Err(Error::new(ErrorKind::ReadRecord(format!(
"at line {}, {}",
self.line, e
))))
}
}
}
Ok(Some(record))
}
}
fn parse_input_line(
input: String,
record: &mut Record,
flag: Flag,
) -> Result<Option<Option<String>>> {
match flag {
Flag::D => {
if input.trim().is_empty() {
return Ok(None);
}
if [
"Tandem Repeats",
"Gary Benson",
"Program",
"Boston",
"Version",
"Parameters",
]
.iter()
.any(|s| input.starts_with(*s))
{
return Ok(None);
}
if input.starts_with("Sequence") {
let name = input.replace("Sequence: ", "").trim().to_string();
return Ok(Some(Some(name)));
}
}
Flag::Ngs => {
if input.starts_with('@') {
let name = input.replace('@', "").trim().to_string();
return Ok(Some(Some(name)));
}
}
}
let mut line_elements = input.split(' ').collect::<Vec<&str>>();
match flag {
Flag::D => (),
Flag::Ngs => {
line_elements.truncate(line_elements.len() - 2);
assert!(line_elements.len() == 15);
}
}
if let [start, end, period, copy_number, consensus_pattern_size, perc_matches, perc_indels, alignment_score, perc_a, perc_c, perc_g, perc_t, entropy, consensus_pattern, repeat_seq] =
&line_elements[..]
{
record.start = start.parse::<usize>()?;
record.end = end.parse::<usize>()?;
record.period = period.parse::<u16>()?;
record.copy_number = copy_number.parse::<f32>()?;
record.consensus_pattern_size = consensus_pattern_size.parse::<u16>()?;
record.perc_matches = perc_matches.parse::<u8>()?;
record.perc_indels = perc_indels.parse::<u8>()?;
record.alignment_score = alignment_score.parse::<u32>()?;
record.perc_a = perc_a.parse::<u8>()?;
record.perc_c = perc_c.parse::<u8>()?;
record.perc_g = perc_g.parse::<u8>()?;
record.perc_t = perc_t.parse::<u8>()?;
record.entropy = entropy.parse::<f32>()?;
record.consensus_pattern = consensus_pattern.to_string();
record.repeat_seq = repeat_seq.trim().to_string();
Ok(Some(None))
} else {
Err(Error::new(ErrorKind::Parser(
"could not split into 15 elements".into(),
)))
}
}
pub struct RecordsIter<'r, R: 'r> {
rdr: &'r mut Reader<R>,
}
impl<'r, R: io::Read> RecordsIter<'r, R> {
fn new(rdr: &'r mut Reader<R>) -> RecordsIter<'r, R> {
RecordsIter { rdr }
}
pub fn reader(&self) -> &Reader<R> {
self.rdr
}
pub fn reader_mut(&mut self) -> &mut Reader<R> {
self.rdr
}
}
impl<'r, R: io::Read> Iterator for RecordsIter<'r, R> {
type Item = Result<Record>;
fn next(&mut self) -> Option<Result<Record>> {
match self.rdr.read_record() {
Ok(Some(mut r)) => {
self.rdr.line += 1;
r.seq_id = self.rdr.id.clone();
Some(Ok(r))
}
Ok(None) => None,
Err(e) => Some(Err(e)),
}
}
}
pub struct RecordsIntoIter<R> {
rdr: Reader<R>,
}
impl<R: io::Read> RecordsIntoIter<R> {
fn new(rdr: Reader<R>) -> RecordsIntoIter<R> {
RecordsIntoIter { rdr }
}
pub fn reader(&self) -> &Reader<R> {
&self.rdr
}
pub fn reader_mut(&mut self) -> &mut Reader<R> {
&mut self.rdr
}
pub fn into_reader(self) -> Reader<R> {
self.rdr
}
}
impl<R: io::Read> Iterator for RecordsIntoIter<R> {
type Item = Result<Record>;
fn next(&mut self) -> Option<Result<Record>> {
match self.rdr.read_record() {
Ok(Some(mut r)) => {
self.rdr.line += 1;
r.seq_id = self.rdr.id.clone();
Some(Ok(r))
}
Ok(None) => None,
Err(e) => Some(Err(e)),
}
}
}
#[cfg(test)]
mod tests {
use super::{Flag, Reader};
fn b(s: &str) -> &[u8] {
s.as_bytes()
}
#[test]
fn test_read_record() {
let data = b("51 71 11 1.9 11 100 0 84 19 4 38 38 1.73 TTAGGTTAGGC TTAGGTTAGGCTTAGGTTAGG");
let mut reader = Reader::from_reader(data, Flag::D);
let first = reader.records().next().unwrap().unwrap();
assert_eq!(first.start, 51);
assert_eq!(first.end, 71);
assert_eq!(first.consensus_pattern, "TTAGGTTAGGC".to_string());
}
#[test]
fn test_read_record_and_seq_id() {
let data = b("Sequence: first\n\n51 71 11 1.9 11 100 0 84 19 4 38 38 1.73 TTAGGTTAGGC TTAGGTTAGGCTTAGGTTAGG");
let mut reader = Reader::from_reader(data, Flag::D);
let first = reader.records().next().unwrap().unwrap();
assert_eq!(first.seq_id, "first".to_string());
assert_eq!(first.start, 51);
assert_eq!(first.end, 71);
assert_eq!(first.consensus_pattern, "TTAGGTTAGGC".to_string());
}
#[test]
fn test_flag_ngs() {
let data = b(
"@first\n51 71 11 1.9 11 100 0 84 19 4 38 38 1.73 TTAGGTTAGGC TTAGGTTAGGCTTAGGTTAGG . CW",
);
let mut reader = Reader::from_reader(data, Flag::Ngs);
let first = reader.records().next().unwrap().unwrap();
assert_eq!(first.seq_id, "first".to_string());
assert_eq!(first.start, 51);
assert_eq!(first.end, 71);
assert_eq!(first.consensus_pattern, "TTAGGTTAGGC".to_string());
}
}