use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
use std::str::FromStr;
use crate::fasta::FastaReader;
use crate::models::CoordinateVector;
use crate::models::{Sequence, Transcript, TranscriptWrite};
use crate::utils::errors::{FastaError, ReadWriteError};
pub struct Writer<W: std::io::Write, R: std::io::Read + std::io::Seek> {
inner: BufWriter<W>,
seq_builder: SequenceBuilder,
fasta_reader: Option<FastaReader<R>>,
line_length: usize,
header_template: fn(&Transcript) -> String,
}
impl<R: std::io::Read + std::io::Seek> Writer<File, R> {
pub fn from_file<P: AsRef<Path> + std::fmt::Display>(path: P) -> Result<Self, ReadWriteError> {
match File::create(path.as_ref()) {
Ok(file) => Ok(Self::new(file)),
Err(err) => Err(ReadWriteError::new(format!(
"unable to open file {} for writing: {}",
path, err
))),
}
}
}
impl<W: std::io::Write, R: std::io::Read + std::io::Seek> Writer<W, R> {
pub fn new(writer: W) -> Self {
Writer::from_buf_writer(BufWriter::new(writer))
}
pub fn with_capacity(capacity: usize, writer: W) -> Self {
Writer::from_buf_writer(BufWriter::with_capacity(capacity, writer))
}
fn from_buf_writer(writer: BufWriter<W>) -> Self {
Writer {
inner: writer,
seq_builder: SequenceBuilder::Cds,
fasta_reader: None,
line_length: 50,
header_template: |tx| format!("{} {}", tx.name(), tx.gene()),
}
}
pub fn fasta_reader(&mut self, r: FastaReader<R>) {
self.fasta_reader = Some(r)
}
pub fn fasta_format(&mut self, b: &str) {
self.seq_builder = SequenceBuilder::from_str(b).unwrap()
}
pub fn line_length(&mut self, l: usize) {
self.line_length = l
}
pub fn header_template(&mut self, func: fn(&Transcript) -> String) {
self.header_template = func
}
pub fn flush(&mut self) -> Result<(), ReadWriteError> {
match self.inner.flush() {
Ok(res) => Ok(res),
Err(err) => Err(ReadWriteError::new(err)),
}
}
pub fn into_inner(self) -> Result<W, ReadWriteError> {
match self.inner.into_inner() {
Ok(res) => Ok(res),
Err(err) => Err(ReadWriteError::new(err)),
}
}
pub fn inner_mut(&mut self) -> &mut BufWriter<W> {
&mut self.inner
}
pub fn write_features(&mut self, transcript: &Transcript) -> Result<(), std::io::Error> {
if let Some(fasta_reader) = &mut self.fasta_reader {
let mut features: Vec<(&str, CoordinateVector)> = vec![];
if transcript.is_coding() {
if transcript.forward() {
features.push(("5UTR", transcript.utr5_coordinates()));
} else {
features.push(("3UTR", transcript.utr3_coordinates()));
}
features.push(("CDS", transcript.cds_coordinates()));
if transcript.forward() {
features.push(("3UTR", transcript.utr3_coordinates()));
} else {
features.push(("5UTR", transcript.utr5_coordinates()));
}
} else {
features.push(("ncExon", transcript.utr_coordinates()));
}
let mut line: Vec<String> = vec![
transcript.gene().to_string(),
transcript.name().to_string(),
transcript.chrom().to_string(),
String::with_capacity(10), String::with_capacity(10), transcript.strand().to_string(),
String::with_capacity(6), ];
let mut line_string = String::with_capacity(10000);
for feature_section in features {
line[6].clear();
line[6].push_str(feature_section.0);
for feature in feature_section.1 {
let mut sequence = fasta_reader.read_sequence(
feature.0,
feature.1.into(),
feature.2.into(),
)?;
if !transcript.forward() {
sequence.reverse_complement();
}
line[3] = (feature.1 - 1).to_string(); line[4] = feature.2.to_string(); line_string.clear();
line_string.push_str(&line.join("\t"));
line_string.push('\t');
sequence.write_into_string(&mut line_string);
line_string.push('\n');
self.inner.write_all(line_string.as_bytes())?;
}
}
Ok(())
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"no fasta reader specified",
))
}
}
}
impl<W: std::io::Write, R: std::io::Read + std::io::Seek> TranscriptWrite for Writer<W, R> {
fn writeln_single_transcript(&mut self, transcript: &Transcript) -> Result<(), std::io::Error> {
self.write_single_transcript(transcript)?;
self.inner.write_all("\n".as_bytes())
}
fn write_single_transcript(&mut self, transcript: &Transcript) -> Result<(), std::io::Error> {
if let Some(fasta_reader) = &mut self.fasta_reader {
self.inner
.write_all(format!(">{}", (self.header_template)(transcript)).as_bytes())?;
let sequence = self.seq_builder.build(transcript, fasta_reader)?.to_bytes();
for line in sequence.chunks(self.line_length) {
self.inner.write_all("\n".as_bytes())?;
self.inner.write_all(line)?;
}
Ok(())
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"no fasta reader specified",
))
}
}
}
enum SequenceBuilder {
Cds,
Exons,
Transcript,
}
impl SequenceBuilder {
pub fn build<R: std::io::Read + std::io::Seek>(
&self,
transcript: &Transcript,
fasta_reader: &mut FastaReader<R>,
) -> Result<Sequence, FastaError> {
let segments = match self {
SequenceBuilder::Cds => transcript.cds_coordinates(),
SequenceBuilder::Exons => transcript.exon_coordinates(),
SequenceBuilder::Transcript => vec![(
transcript.chrom(),
transcript.tx_start(),
transcript.tx_end(),
)],
};
Sequence::from_coordinates(&segments, &transcript.strand(), fasta_reader)
}
}
impl FromStr for SequenceBuilder {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"cds" => Ok(Self::Cds),
"exons" => Ok(Self::Exons),
"transcript" => Ok(Self::Transcript),
_ => Err(format!("invalid fasta-format {}", s)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::fasta::FastaReader;
use crate::tests::transcripts::standard_transcript;
#[test]
fn test_creating_writer() {
let transcripts = vec![standard_transcript()];
let mut writer = Writer::new(Vec::new());
writer.fasta_reader(FastaReader::from_file("tests/data/small.fasta").unwrap());
writer.fasta_format("exons");
writer.write_transcript_vec(&transcripts).unwrap();
let output = String::from_utf8(writer.into_inner().unwrap()).unwrap();
assert_eq!(
output.split('\n').collect::<Vec<&str>>()[0],
">Test-Transcript Test-Gene"
);
assert_eq!(
output.split('\n').collect::<Vec<&str>>()[1],
"CACGGTGGATGCCCACTGAGAGGGG"
);
let mut writer = Writer::new(Vec::new());
writer.fasta_reader(FastaReader::from_file("tests/data/small.fasta").unwrap());
writer.fasta_format("cds");
writer.write_transcript_vec(&transcripts).unwrap();
let output = String::from_utf8(writer.into_inner().unwrap()).unwrap();
assert_eq!(
output.split('\n').collect::<Vec<&str>>()[0],
">Test-Transcript Test-Gene"
);
assert_eq!(output.split('\n').collect::<Vec<&str>>()[1], "ATGCCCACTGA");
let mut writer = Writer::new(Vec::new());
writer.fasta_reader(FastaReader::from_file("tests/data/small.fasta").unwrap());
writer.fasta_format("transcript");
writer.write_transcript_vec(&transcripts).unwrap();
let output = String::from_utf8(writer.into_inner().unwrap()).unwrap();
assert_eq!(
output.split('\n').collect::<Vec<&str>>()[0],
">Test-Transcript Test-Gene"
);
assert_eq!(
output.split('\n').collect::<Vec<&str>>()[1],
"CACGGGGAAATGGATGGACTGCCCAGTAGCCTGAGGACACAGGGG"
);
}
#[test]
fn test_sequence_building() {
let transcript = standard_transcript();
let mut reader = FastaReader::from_file("tests/data/small.fasta").unwrap();
let seq = SequenceBuilder::Cds.build(&transcript, &mut reader);
assert_eq!(seq.unwrap().to_string(), "ATGCCCACTGA".to_string());
}
}