use omics::coordinate;
use omics::coordinate::interbase::Coordinate;
use omics::coordinate::interval::interbase::Interval;
use omics::coordinate::position::Number;
use thiserror::Error;
use tracing_log::log::warn;
use crate::alignment::Section;
use crate::alignment::section::data;
use crate::alignment::section::data::Record;
use crate::alignment::section::header::sequence;
use crate::liftover::stepthrough::interval_pair::ContiguousIntervalPair;
pub mod interval_pair;
#[derive(Debug, Error)]
pub enum Error {
#[error("interval stepthrough out of bounds: moving {0} from {1} by {2}")]
IntervalStepthroughOutOfBounds(String, Coordinate, Number),
#[error("interval error: {0}")]
Interval(coordinate::interval::Error),
#[error("invalid interval pair: {0}")]
InvalidIntervalPair(interval_pair::Error),
#[error("misaligned data section, which indicates a malformed chain file")]
MisalignedDataSection,
#[error("sequence error: {0}")]
Sequence(sequence::Error),
}
pub struct StepThroughWithData {
reference_pointer: Coordinate,
expected_reference_end: Coordinate,
query_pointer: Coordinate,
expected_query_end: Coordinate,
data: Box<dyn Iterator<Item = Record>>,
finished: bool,
}
impl std::fmt::Debug for StepThroughWithData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("StepThroughWithData")
.field("expected_query_end", &self.expected_query_end)
.field("expected_reference_end", &self.expected_reference_end)
.field("query_pointer", &self.query_pointer)
.field("reference_pointer", &self.reference_pointer)
.finish()
}
}
impl StepThroughWithData {
pub(crate) fn new(section: &Section) -> Result<Self, Error> {
let (reference_pointer, expected_reference_end) = section
.header()
.reference_sequence()
.interval()
.map_err(Error::Sequence)?
.into_coordinates();
let (query_pointer, expected_query_end) = section
.header()
.query_sequence()
.interval()
.map_err(Error::Sequence)?
.into_coordinates();
let data = Box::new(section.data().clone().into_iter());
Ok(Self {
query_pointer,
reference_pointer,
expected_query_end,
expected_reference_end,
data,
finished: false,
})
}
fn finish(&mut self) -> Result<(), Error> {
if self.reference_pointer != self.expected_reference_end {
return Err(Error::MisalignedDataSection);
}
if self.query_pointer != self.expected_query_end {
return Err(Error::MisalignedDataSection);
}
self.finished = true;
Ok(())
}
}
impl Drop for StepThroughWithData {
fn drop(&mut self) {
if !self.finished && !std::thread::panicking() {
warn!("a step-through has not been properly finished before dropping!")
}
}
}
impl Iterator for StepThroughWithData {
type Item = Result<(ContiguousIntervalPair, data::Record), Error>;
fn next(&mut self) -> Option<Self::Item> {
let chunk = match self.data.next() {
Some(c) => c,
None => match self.finish() {
Ok(_) => return None,
Err(e) => return Some(Err(e)),
},
};
let reference_start = self.reference_pointer.clone();
if !self.reference_pointer.move_forward(chunk.size()) {
return Some(Err(Error::IntervalStepthroughOutOfBounds(
String::from("reference forward by chunk size"),
self.reference_pointer.clone(),
chunk.size(),
)));
}
let reference_end = self.reference_pointer.clone();
let reference = match Interval::try_new(reference_start, reference_end) {
Ok(interval) => interval,
Err(err) => return Some(Err(Error::Interval(err))),
};
let query_start = self.query_pointer.clone();
if !self.query_pointer.move_forward(chunk.size()) {
return Some(Err(Error::IntervalStepthroughOutOfBounds(
String::from("query forward by chunk size"),
self.query_pointer.clone(),
chunk.size(),
)));
}
let query_end = self.query_pointer.clone();
let query = match Interval::try_new(query_start, query_end) {
Ok(interval) => interval,
Err(err) => return Some(Err(Error::Interval(err))),
};
if let Some(dq) = chunk.dq() {
if !self.query_pointer.move_forward(dq) {
return Some(Err(Error::IntervalStepthroughOutOfBounds(
String::from("query forward by dq"),
self.query_pointer.clone(),
dq,
)));
}
}
if let Some(dt) = chunk.dt() {
if !self.reference_pointer.move_forward(dt) {
return Some(Err(Error::IntervalStepthroughOutOfBounds(
String::from("reference forward by dt"),
self.reference_pointer.clone(),
dt,
)));
}
}
let pair = match ContiguousIntervalPair::try_new(reference, query) {
Ok(p) => p,
Err(err) => return Some(Err(Error::InvalidIntervalPair(err))),
};
Some(Ok((pair, chunk)))
}
}
#[derive(Debug)]
pub struct StepThrough(StepThroughWithData);
impl StepThrough {
pub(crate) fn new(section: &Section) -> Result<Self, Error> {
StepThroughWithData::new(section).map(Self)
}
}
impl Iterator for StepThrough {
type Item = Result<ContiguousIntervalPair, Error>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.0.next()?.map(|(pair, _)| pair))
}
}
#[cfg(test)]
mod tests {
use omics::coordinate::Contig;
use omics::coordinate::Strand;
use omics::coordinate::interbase::Coordinate;
use omics::coordinate::interval::interbase::Interval;
#[test]
fn test_it_correctly_steps_through_a_valid_chain_file() -> Result<(), Box<dyn std::error::Error>>
{
let data = b"chain 0 seq0 4 + 0 4 seq0 5 - 0 5 1\n3\t0\t1\n1";
let mut reader = crate::Reader::new(&data[..]);
let mut results = reader.sections().map(|x| x.unwrap()).collect::<Vec<_>>();
assert_eq!(results.len(), 1);
let section = results.pop().unwrap();
let mut stepthrough = section.stepthrough()?;
let result = stepthrough.next().unwrap().unwrap();
let reference = Interval::try_new(
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Positive, 0u64),
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Positive, 3u64),
)?;
let query = Interval::try_new(
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Negative, 5u64),
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Negative, 2u64),
)?;
assert_eq!(result.reference(), &reference);
assert_eq!(result.query(), &query);
let result = stepthrough.next().unwrap().unwrap();
let reference = Interval::try_new(
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Positive, 3u64),
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Positive, 4u64),
)?;
let query = Interval::try_new(
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Negative, 1u64),
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Negative, 0u64),
)?;
assert_eq!(result.reference(), &reference);
assert_eq!(result.query(), &query);
assert!(stepthrough.next().is_none());
Ok(())
}
#[test]
fn test_it_errors_when_there_is_an_alignment_issue() -> Result<(), Box<dyn std::error::Error>> {
let data = b"chain 0 seq0 4 + 0 3 seq0 9 - 0 9 1\n2\t0\t1\n1";
let mut reader = crate::Reader::new(&data[..]);
let mut results = reader.sections().map(|x| x.unwrap()).collect::<Vec<_>>();
assert_eq!(results.len(), 1);
let section = results.pop().unwrap();
let mut stepthrough = section.stepthrough()?;
let result = stepthrough.next().unwrap().unwrap();
let reference = Interval::try_new(
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Positive, 0u64),
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Positive, 2u64),
)?;
let query = Interval::try_new(
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Negative, 9u64),
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Negative, 7u64),
)?;
assert_eq!(result.reference(), &reference);
assert_eq!(result.query(), &query);
let result = stepthrough.next().unwrap().unwrap();
let reference = Interval::try_new(
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Positive, 2u64),
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Positive, 3u64),
)?;
let query = Interval::try_new(
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Negative, 6u64),
Coordinate::new(Contig::new_unchecked("seq0"), Strand::Negative, 5u64),
)?;
assert_eq!(result.reference(), &reference);
assert_eq!(result.query(), &query);
let err = stepthrough.next().unwrap().unwrap_err();
assert_eq!(
err.to_string(),
String::from("misaligned data section, which indicates a malformed chain file")
);
Ok(())
}
}