use std::{
io::{BufRead, BufReader},
path::PathBuf,
};
use crate::{
io::{
parsers::{tsv::build_tsv_reader, utils::parse_column},
InputStream,
},
ranges::GenomicRangeRecord,
GRangesError, Position,
};
pub const PARSE_CAPACITY: usize = 512;
pub struct BedlikeIterator {
reader: BufReader<Box<dyn std::io::Read>>,
line_buffer: String,
}
impl std::fmt::Debug for BedlikeIterator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BedlikeIterator").finish_non_exhaustive()
}
}
impl BedlikeIterator {
pub fn new(filepath: impl Into<PathBuf>) -> Result<Self, GRangesError> {
let input_file = InputStream::new(filepath);
let reader = input_file.reader()?;
let line_buffer = String::with_capacity(PARSE_CAPACITY);
Ok(Self {
reader,
line_buffer,
})
}
}
impl Iterator for BedlikeIterator {
type Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>;
fn next(&mut self) -> Option<Self::Item> {
self.line_buffer.clear();
loop {
self.line_buffer.clear();
match self.reader.read_line(&mut self.line_buffer) {
Ok(0) => return None,
Ok(_) => {
if !self.line_buffer.starts_with('#') {
let line = self.line_buffer.trim_end();
return Some(parse_bed_lazy(line));
}
}
Err(e) => return Some(Err(GRangesError::IOError(e))),
}
}
}
}
pub fn parse_bed_lazy(line: &str) -> Result<GenomicRangeRecord<Option<String>>, GRangesError> {
let columns: Vec<&str> = line.splitn(4, '\t').collect();
if columns.len() < 3 {
return Err(GRangesError::Bed3TooFewColumns(
columns.len(),
line.to_string(),
));
}
let seqname = parse_column(columns[0], line)?;
let start: Position = parse_column(columns[1], line)?;
let end: Position = parse_column(columns[2], line)?;
let data = if columns.len() > 3 {
Some(columns[3].to_string())
} else {
None
};
Ok(GenomicRangeRecord {
seqname,
start,
end,
data,
})
}
pub fn valid_bedlike(filepath: impl Into<PathBuf>) -> Result<bool, GRangesError> {
let filepath = filepath.into();
let mut reader = build_tsv_reader(filepath)?;
let mut records = reader.records();
if let Some(result) = records.next() {
let record = result?;
if record.len() < 3 {
return Ok(false);
}
let start_result = record.get(1).unwrap().trim().parse::<usize>();
let end_result = record.get(2).unwrap().trim().parse::<usize>();
match (start_result, end_result) {
(Ok(_), Ok(_)) => Ok(true), _ => Ok(false), }
} else {
Ok(false)
}
}
#[cfg(test)]
mod tests {
use super::valid_bedlike;
#[test]
fn test_valid_bedlike() {
assert_eq!(valid_bedlike("tests_data/example.bed").unwrap(), true);
assert_eq!(
valid_bedlike("tests_data/invalid_format.bed").unwrap(),
false
);
assert_eq!(
valid_bedlike("tests_data/example_bedlike.tsv").unwrap(),
true
);
}
}