use crate::core::{GenomicReader, GenomicRecordIterator};
use crate::error::Result;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
#[derive(Debug, Clone)]
pub struct FastaRecord {
pub id: String,
pub description: Option<String>,
pub sequence: String,
}
impl FastaRecord {
pub fn header(&self) -> String {
match &self.description {
Some(desc) => format!("{} {}", self.id, desc),
None => self.id.clone(),
}
}
pub fn len(&self) -> usize {
self.sequence.len()
}
pub fn is_empty(&self) -> bool {
self.sequence.is_empty()
}
}
pub struct FastaReader<R: BufRead> {
reader: R,
current_line: Option<String>,
}
impl FastaReader<BufReader<File>> {
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
let file = File::open(path)?;
let reader = BufReader::new(file);
Ok(Self::new(reader))
}
}
impl<R: BufRead> FastaReader<R> {
pub fn new(reader: R) -> Self {
Self {
reader,
current_line: None,
}
}
fn peek_line(&mut self) -> Result<Option<&str>> {
if self.current_line.is_none() {
let mut line = String::new();
let bytes_read = self.reader.read_line(&mut line)?;
if bytes_read == 0 {
return Ok(None);
}
self.current_line = Some(line);
}
Ok(self.current_line.as_deref())
}
fn consume_line(&mut self) -> Option<String> {
self.current_line.take()
}
}
impl<R: BufRead> GenomicRecordIterator for FastaReader<R> {
type Record = FastaRecord;
fn next_raw(&mut self) -> Result<Option<Vec<u8>>> {
Ok(None)
}
fn next_record(&mut self) -> Result<Option<Self::Record>> {
loop {
let line = match self.peek_line()? {
Some(line) => line.to_string(),
None => return Ok(None),
};
if line.starts_with('>') {
self.consume_line();
let header = line.trim_start_matches('>').trim();
let (id, description) = match header.split_once(' ') {
Some((id, desc)) => (id.to_string(), Some(desc.to_string())),
None => (header.to_string(), None),
};
let mut sequence = String::new();
loop {
match self.peek_line()? {
Some(line) if !line.starts_with('>') => {
let line = self.consume_line().unwrap();
sequence.push_str(line.trim());
}
_ => break,
}
}
return Ok(Some(FastaRecord {
id,
description,
sequence,
}));
} else {
self.consume_line();
}
}
}
}
impl<R: BufRead> GenomicReader for FastaReader<R> {
type Metadata = ();
fn metadata(&self) -> &Self::Metadata {
&() }
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_fasta_parsing() {
let fasta_data = ">seq1 description here\n\
ACGTACGTACGT\n\
ACGTACGT\n\
>seq2\n\
GGGGCCCC\n";
let cursor = Cursor::new(fasta_data);
let mut reader = FastaReader::new(cursor);
let rec1 = reader.next_record().unwrap().unwrap();
assert_eq!(rec1.id, "seq1");
assert_eq!(rec1.description, Some("description here".to_string()));
assert_eq!(rec1.sequence, "ACGTACGTACGTACGTACGT");
let rec2 = reader.next_record().unwrap().unwrap();
assert_eq!(rec2.id, "seq2");
assert_eq!(rec2.description, None);
assert_eq!(rec2.sequence, "GGGGCCCC");
assert!(reader.next_record().unwrap().is_none());
}
}