use std::convert::AsRef;
use std::fmt::Write;
use std::fs;
use std::io;
use std::marker::Copy;
use std::ops::Deref;
use std::path::Path;
use anyhow::Context;
use bio_types::annot;
use bio_types::annot::loc::Loc;
use bio_types::strand;
#[derive(Debug)]
pub struct Reader<R: io::Read> {
inner: csv::Reader<R>,
}
impl Reader<fs::File> {
pub fn from_file<P: AsRef<Path> + std::fmt::Debug>(path: P) -> anyhow::Result<Self> {
fs::File::open(&path)
.map(Reader::new)
.with_context(|| format!("Failed to read bed from {:#?}", path))
}
}
impl<R: io::Read> Reader<R> {
pub fn new(reader: R) -> Self {
Reader {
inner: csv::ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(false)
.comment(Some(b'#'))
.from_reader(reader),
}
}
pub fn records(&mut self) -> Records<'_, R> {
Records {
inner: self.inner.deserialize(),
}
}
}
pub struct Records<'a, R: io::Read> {
inner: csv::DeserializeRecordsIter<'a, R, Record>,
}
impl<'a, R: io::Read> Iterator for Records<'a, R> {
type Item = csv::Result<Record>;
fn next(&mut self) -> Option<csv::Result<Record>> {
self.inner.next()
}
}
#[derive(Debug)]
pub struct Writer<W: io::Write> {
inner: csv::Writer<W>,
}
impl Writer<fs::File> {
#[allow(clippy::wrong_self_convention)]
pub fn to_file<P: AsRef<Path>>(path: P) -> io::Result<Self> {
fs::File::create(path).map(Writer::new)
}
}
impl<W: io::Write> Writer<W> {
pub fn new(writer: W) -> Self {
Writer {
inner: csv::WriterBuilder::new()
.delimiter(b'\t')
.flexible(true)
.from_writer(writer),
}
}
pub fn write(&mut self, record: &Record) -> csv::Result<()> {
if record.aux.is_empty() {
self.inner
.serialize(&(&record.chrom, record.start, record.end))
} else {
self.inner
.serialize(&(&record.chrom, record.start, record.end, &record.aux))
}
}
}
#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)]
pub struct Record {
chrom: String,
start: u64,
end: u64,
#[serde(default)]
aux: Vec<String>,
}
impl Record {
pub fn new() -> Self {
Record {
chrom: "".to_owned(),
start: 0,
end: 0,
aux: vec![],
}
}
pub fn chrom(&self) -> &str {
&self.chrom
}
pub fn start(&self) -> u64 {
self.start
}
pub fn end(&self) -> u64 {
self.end
}
pub fn name(&self) -> Option<&str> {
self.aux(3)
}
pub fn score(&self) -> Option<&str> {
self.aux(4)
}
pub fn strand(&self) -> Option<strand::Strand> {
match self.aux(5) {
Some("+") => Some(strand::Strand::Forward),
Some("-") => Some(strand::Strand::Reverse),
_ => None,
}
}
pub fn aux(&self, i: usize) -> Option<&str> {
let j = i - 3;
if j < self.aux.len() {
Some(&self.aux[j])
} else {
None
}
}
pub fn set_chrom(&mut self, chrom: &str) {
self.chrom = chrom.to_owned();
}
pub fn set_start(&mut self, start: u64) {
self.start = start;
}
pub fn set_end(&mut self, end: u64) {
self.end = end;
}
pub fn set_name(&mut self, name: &str) {
if self.aux.is_empty() {
self.aux.push(name.to_owned());
} else {
self.aux[0] = name.to_owned();
}
}
pub fn set_score(&mut self, score: &str) {
if self.aux.is_empty() {
self.aux.push("".to_owned());
}
if self.aux.len() < 2 {
self.aux.push(score.to_owned());
} else {
self.aux[1] = score.to_owned();
}
}
pub fn push_aux(&mut self, field: &str) {
self.aux.push(field.to_owned());
}
}
impl<'a> From<&'a Record> for annot::contig::Contig<String, strand::Strand> {
fn from(rec: &Record) -> Self {
annot::contig::Contig::new(
rec.chrom.to_string(),
rec.start as isize,
(rec.end - rec.start) as usize,
rec.strand().unwrap_or(strand::Strand::Unknown),
)
}
}
impl<R, S> From<annot::pos::Pos<R, S>> for Record
where
R: Deref<Target = str>,
S: Into<strand::Strand> + Copy,
{
fn from(pos: annot::pos::Pos<R, S>) -> Self {
let mut bed = Record::new();
bed.set_chrom(pos.refid());
bed.set_start(pos.pos() as u64);
bed.set_end((pos.pos() + 1) as u64);
bed.set_name("");
bed.set_score("0");
bed.push_aux(pos.strand().into().strand_symbol());
bed
}
}
impl<R, S> From<annot::contig::Contig<R, S>> for Record
where
R: Deref<Target = str>,
S: Into<strand::Strand> + Copy,
{
fn from(contig: annot::contig::Contig<R, S>) -> Self {
let mut bed = Record::new();
bed.set_chrom(contig.refid());
bed.set_start(contig.start() as u64);
bed.set_end((contig.start() + contig.length() as isize) as u64);
bed.set_name("");
bed.set_score("0");
bed.push_aux(contig.strand().into().strand_symbol());
bed
}
}
impl<R, S> From<annot::spliced::Spliced<R, S>> for Record
where
R: Deref<Target = str>,
S: Into<strand::Strand> + Copy,
{
fn from(spliced: annot::spliced::Spliced<R, S>) -> Self {
let mut bed = Record::new();
bed.set_chrom(spliced.refid());
bed.set_start(spliced.start() as u64);
bed.set_end((spliced.start() + spliced.length() as isize) as u64);
bed.set_name("");
bed.set_score("0");
bed.push_aux(spliced.strand().into().strand_symbol());
bed.push_aux(spliced.start().to_string().as_str()); bed.push_aux(
(spliced.start() + spliced.length() as isize)
.to_string()
.as_str(),
); bed.push_aux("0"); bed.push_aux(spliced.exon_count().to_string().as_str());
let mut block_sizes = String::new();
for block_size in spliced.exon_lengths() {
write!(block_sizes, "{},", block_size).unwrap();
}
bed.push_aux(&block_sizes);
let mut block_starts = String::new();
for block_start in spliced.exon_starts() {
write!(block_starts, "{},", block_start).unwrap();
}
bed.push_aux(&block_starts);
bed
}
}
#[cfg(test)]
mod tests {
use super::*;
use bio_types::annot::{contig::Contig, pos::Pos, spliced::Spliced};
use bio_types::strand::{ReqStrand, Strand};
const BED_FILE: &[u8] = b"1\t5\t5000\tname1\tup
2\t3\t5005\tname2\tup
";
const BED_FILE_COMMENT: &[u8] = b"\
# this line should be ignored
1\t5\t5000\tname1\tup
# and this one as well
2\t3\t5005\tname2\tup
";
const BED_FILE_COMPACT: &[u8] = b"1\t5\t5000\n2\t3\t5005\n";
#[test]
fn test_reader() {
let chroms = ["1", "2"];
let starts = [5, 3];
let ends = [5000, 5005];
let names = ["name1", "name2"];
let scores = ["up", "up"];
let mut reader = Reader::new(BED_FILE);
for (i, r) in reader.records().enumerate() {
let record = r.expect("Error reading record");
assert_eq!(record.chrom(), chroms[i]);
assert_eq!(record.start(), starts[i]);
assert_eq!(record.end(), ends[i]);
assert_eq!(record.name().expect("Error reading name"), names[i]);
assert_eq!(record.score().expect("Error reading score"), scores[i]);
}
}
#[test]
fn test_reader_with_comment() {
let chroms = ["1", "2"];
let starts = [5, 3];
let ends = [5000, 5005];
let names = ["name1", "name2"];
let scores = ["up", "up"];
let mut reader = Reader::new(BED_FILE_COMMENT);
for (i, r) in reader.records().enumerate() {
let record = r.expect("Error reading record");
assert_eq!(record.chrom(), chroms[i]);
assert_eq!(record.start(), starts[i]);
assert_eq!(record.end(), ends[i]);
assert_eq!(record.name().expect("Error reading name"), names[i]);
assert_eq!(record.score().expect("Error reading score"), scores[i]);
}
}
#[test]
fn test_reader_compact() {
let chroms = ["1", "2"];
let starts = [5, 3];
let ends = [5000, 5005];
let mut reader = Reader::new(BED_FILE_COMPACT);
for (i, r) in reader.records().enumerate() {
let record = r.unwrap();
assert_eq!(record.chrom(), chroms[i]);
assert_eq!(record.start(), starts[i]);
assert_eq!(record.end(), ends[i]);
}
}
#[test]
fn test_reader_from_file_path_doesnt_exist_returns_err() {
let path = Path::new("/I/dont/exist.bed");
let error = Reader::from_file(path)
.unwrap_err()
.downcast::<String>()
.unwrap();
assert_eq!(&error, "Failed to read bed from \"/I/dont/exist.bed\"")
}
#[test]
fn test_writer() {
let mut reader = Reader::new(BED_FILE);
let mut writer = Writer::new(vec![]);
for r in reader.records() {
writer
.write(&r.expect("Error reading record"))
.expect("Error writing record");
}
assert_eq!(writer.inner.into_inner().unwrap(), BED_FILE);
}
#[test]
fn spliced_to_bed() {
let tma20 = Spliced::with_lengths_starts(
"chrV".to_owned(),
166236,
&[535, 11],
&[0, 638],
ReqStrand::Reverse,
)
.unwrap();
let mut buf = Vec::new();
{
let mut writer = Writer::new(&mut buf);
let mut tma20_bed = Record::from(tma20);
tma20_bed.set_name("YER007C-A");
writer.write(&tma20_bed).unwrap();
}
assert_eq!(
"chrV\t166236\t166885\tYER007C-A\t0\t-\t166236\t166885\t0\t2\t535,11,\t0,638,\n",
String::from_utf8(buf).unwrap().as_str()
);
let rpl7b = Spliced::with_lengths_starts(
"chrXVI".to_owned(),
173151,
&[11, 94, 630],
&[0, 420, 921],
ReqStrand::Forward,
)
.unwrap();
let mut buf = Vec::new();
{
let mut writer = Writer::new(&mut buf);
let mut rpl7b_bed = Record::from(rpl7b);
rpl7b_bed.set_name("YPL198W");
writer.write(&rpl7b_bed).unwrap();
}
assert_eq!(
"chrXVI\t173151\t174702\tYPL198W\t0\t+\t173151\t174702\t0\t3\t11,94,630,\t0,420,921,\n",
String::from_utf8(buf).unwrap().as_str()
);
let tad3 = Spliced::with_lengths_starts(
"chrXII".to_owned(),
765265,
&[808, 52, 109],
&[0, 864, 984],
ReqStrand::Reverse,
)
.unwrap();
let mut buf = Vec::new();
{
let mut writer = Writer::new(&mut buf);
let mut tad3_bed = Record::from(tad3);
tad3_bed.set_name("YLR316C");
writer.write(&tad3_bed).unwrap();
}
assert_eq!("chrXII\t765265\t766358\tYLR316C\t0\t-\t765265\t766358\t0\t3\t808,52,109,\t0,864,984,\n",
String::from_utf8(buf).unwrap().as_str());
}
#[test]
fn test_bed_from_contig() {
let contig = Contig::new(
"chrXI".to_owned(),
334412,
334916 - 334412,
ReqStrand::Reverse,
);
let record = Record::from(contig);
assert_eq!(record.chrom(), String::from("chrXI"));
assert_eq!(record.start(), 334412);
assert_eq!(record.end(), 334412 + (334916 - 334412));
assert_eq!(record.name(), Some(""));
assert_eq!(record.score(), Some("0"));
assert_eq!(record.strand(), Some(Strand::Reverse));
}
#[test]
fn test_bed_from_pos() {
let pos = Pos::new("chrXI".to_owned(), 334412, ReqStrand::Reverse);
let record = Record::from(pos);
assert_eq!(record.chrom(), String::from("chrXI"));
assert_eq!(record.start(), 334412);
assert_eq!(record.end(), 334412 + 1);
assert_eq!(record.name(), Some(""));
assert_eq!(record.score(), Some("0"));
assert_eq!(record.strand(), Some(Strand::Reverse));
}
}