use crate::error::{Error, Result};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct GenomicPosition {
pub chrom: String,
pub pos: u64,
}
impl GenomicPosition {
pub fn new(chrom: impl Into<String>, pos: u64) -> Self {
Self {
chrom: chrom.into(),
pos,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct GenomicInterval {
pub chrom: String,
pub start: u64,
pub end: u64,
}
impl GenomicInterval {
pub fn new(chrom: impl Into<String>, start: u64, end: u64) -> Result<Self> {
let chrom = chrom.into();
if start > end {
return Err(Error::InvalidInput(format!(
"Start position {} is greater than end position {}",
start, end
)));
}
Ok(Self { chrom, start, end })
}
pub fn contains(&self, pos: &GenomicPosition) -> bool {
self.chrom == pos.chrom && self.start < pos.pos && pos.pos <= self.end
}
pub fn len(&self) -> u64 {
self.end - self.start
}
pub fn is_empty(&self) -> bool {
self.start == self.end
}
}
pub trait GenomicRecordIterator {
type Record;
fn next_record(&mut self) -> Result<Option<Self::Record>>;
fn next_raw(&mut self) -> Result<Option<Vec<u8>>>;
fn collect_all(mut self) -> Result<Vec<Self::Record>>
where
Self: Sized,
{
let mut records = Vec::new();
while let Some(record) = self.next_record()? {
records.push(record);
}
Ok(records)
}
fn chunks(self, chunk_size: usize) -> ChunkedIterator<Self>
where
Self: Sized,
{
ChunkedIterator {
inner: self,
chunk_size,
}
}
}
pub struct ChunkedIterator<I> {
inner: I,
chunk_size: usize,
}
impl<I> Iterator for ChunkedIterator<I>
where
I: GenomicRecordIterator,
{
type Item = Result<Vec<I::Record>>;
fn next(&mut self) -> Option<Self::Item> {
let mut chunk = Vec::with_capacity(self.chunk_size);
for _ in 0..self.chunk_size {
match self.inner.next_record() {
Ok(Some(record)) => chunk.push(record),
Ok(None) => break,
Err(e) => return Some(Err(e)),
}
}
if chunk.is_empty() {
None
} else {
Some(Ok(chunk))
}
}
}
pub trait GenomicReader: GenomicRecordIterator {
type Metadata;
fn metadata(&self) -> &Self::Metadata;
}
pub trait GenomicWriter {
type Record;
fn write_record(&mut self, record: &Self::Record) -> Result<()>;
fn write_records(&mut self, records: &[Self::Record]) -> Result<()> {
for record in records {
self.write_record(record)?;
}
Ok(())
}
fn flush(&mut self) -> Result<()>;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Strand {
Forward,
Reverse,
Unknown,
}
impl Strand {
pub fn from_char(c: char) -> Self {
match c {
'+' => Strand::Forward,
'-' => Strand::Reverse,
_ => Strand::Unknown,
}
}
pub fn to_char(self) -> char {
match self {
Strand::Forward => '+',
Strand::Reverse => '-',
Strand::Unknown => '.',
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_genomic_position() {
let pos = GenomicPosition::new("chr1", 12345);
assert_eq!(pos.chrom, "chr1");
assert_eq!(pos.pos, 12345);
}
#[test]
fn test_genomic_interval() {
let interval = GenomicInterval::new("chr1", 1000, 2000).unwrap();
assert_eq!(interval.len(), 1000);
assert!(!interval.is_empty());
let pos_inside = GenomicPosition::new("chr1", 1500);
let pos_outside = GenomicPosition::new("chr1", 3000);
assert!(interval.contains(&pos_inside));
assert!(!interval.contains(&pos_outside));
}
#[test]
fn test_invalid_interval() {
let result = GenomicInterval::new("chr1", 2000, 1000);
assert!(result.is_err());
}
#[test]
fn test_strand() {
assert_eq!(Strand::from_char('+'), Strand::Forward);
assert_eq!(Strand::from_char('-'), Strand::Reverse);
assert_eq!(Strand::from_char('.'), Strand::Unknown);
assert_eq!(Strand::Forward.to_char(), '+');
}
}