use core::fmt;
use cyanea_core::{CyaneaError, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Strand {
Forward,
Reverse,
Unknown,
}
impl Strand {
pub fn is_forward(&self) -> bool {
matches!(self, Strand::Forward)
}
pub fn is_reverse(&self) -> bool {
matches!(self, Strand::Reverse)
}
}
impl fmt::Display for Strand {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Strand::Forward => write!(f, "+"),
Strand::Reverse => write!(f, "-"),
Strand::Unknown => write!(f, "."),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct GenomicPosition {
pub chrom: String,
pub position: u64,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct GenomicInterval {
pub chrom: String,
pub start: u64,
pub end: u64,
pub strand: Strand,
}
impl GenomicInterval {
pub fn new(chrom: impl Into<String>, start: u64, end: u64) -> Result<Self> {
Self::with_strand(chrom, start, end, Strand::Unknown)
}
pub fn with_strand(
chrom: impl Into<String>,
start: u64,
end: u64,
strand: Strand,
) -> Result<Self> {
if start >= end {
return Err(CyaneaError::InvalidInput(format!(
"interval start ({start}) must be less than end ({end})"
)));
}
Ok(Self {
chrom: chrom.into(),
start,
end,
strand,
})
}
pub fn len(&self) -> u64 {
self.end - self.start
}
pub fn is_empty(&self) -> bool {
self.start == self.end
}
pub fn contains(&self, position: u64) -> bool {
position >= self.start && position < self.end
}
pub fn overlaps(&self, other: &GenomicInterval) -> bool {
self.chrom == other.chrom && self.start < other.end && other.start < self.end
}
pub fn intersect(&self, other: &GenomicInterval) -> Option<GenomicInterval> {
if !self.overlaps(other) {
return None;
}
let start = self.start.max(other.start);
let end = self.end.min(other.end);
Some(GenomicInterval {
chrom: self.chrom.clone(),
start,
end,
strand: self.strand,
})
}
pub fn merge(&self, other: &GenomicInterval) -> Option<GenomicInterval> {
if !self.overlaps(other) {
return None;
}
let start = self.start.min(other.start);
let end = self.end.max(other.end);
Some(GenomicInterval {
chrom: self.chrom.clone(),
start,
end,
strand: self.strand,
})
}
pub fn distance(&self, other: &GenomicInterval) -> Option<u64> {
if self.chrom != other.chrom {
return None;
}
if self.overlaps(other) {
return Some(0);
}
if self.end <= other.start {
Some(other.start - self.end)
} else {
Some(self.start - other.end)
}
}
pub fn midpoint(&self) -> u64 {
(self.start + self.end) / 2
}
}
impl fmt::Display for GenomicInterval {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}:{}-{}({})",
self.chrom, self.start, self.end, self.strand
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_strand_display() {
assert_eq!(Strand::Forward.to_string(), "+");
assert_eq!(Strand::Reverse.to_string(), "-");
assert_eq!(Strand::Unknown.to_string(), ".");
}
#[test]
fn test_strand_predicates() {
assert!(Strand::Forward.is_forward());
assert!(!Strand::Forward.is_reverse());
assert!(Strand::Reverse.is_reverse());
assert!(!Strand::Unknown.is_forward());
}
#[test]
fn test_interval_new() {
let iv = GenomicInterval::new("chr1", 100, 200).unwrap();
assert_eq!(iv.chrom, "chr1");
assert_eq!(iv.start, 100);
assert_eq!(iv.end, 200);
assert_eq!(iv.strand, Strand::Unknown);
}
#[test]
fn test_interval_invalid() {
assert!(GenomicInterval::new("chr1", 200, 100).is_err());
assert!(GenomicInterval::new("chr1", 100, 100).is_err());
}
#[test]
fn test_interval_with_strand() {
let iv = GenomicInterval::with_strand("chr1", 0, 50, Strand::Forward).unwrap();
assert_eq!(iv.strand, Strand::Forward);
}
#[test]
fn test_interval_len() {
let iv = GenomicInterval::new("chr1", 100, 300).unwrap();
assert_eq!(iv.len(), 200);
}
#[test]
fn test_interval_contains() {
let iv = GenomicInterval::new("chr1", 100, 200).unwrap();
assert!(iv.contains(100));
assert!(iv.contains(150));
assert!(iv.contains(199));
assert!(!iv.contains(200));
assert!(!iv.contains(99));
}
#[test]
fn test_interval_overlaps() {
let a = GenomicInterval::new("chr1", 100, 200).unwrap();
let b = GenomicInterval::new("chr1", 150, 250).unwrap();
let c = GenomicInterval::new("chr1", 200, 300).unwrap();
let d = GenomicInterval::new("chr2", 100, 200).unwrap();
assert!(a.overlaps(&b));
assert!(b.overlaps(&a));
assert!(!a.overlaps(&c)); assert!(!a.overlaps(&d)); }
#[test]
fn test_interval_intersect() {
let a = GenomicInterval::new("chr1", 100, 200).unwrap();
let b = GenomicInterval::new("chr1", 150, 250).unwrap();
let isect = a.intersect(&b).unwrap();
assert_eq!(isect.start, 150);
assert_eq!(isect.end, 200);
let c = GenomicInterval::new("chr1", 200, 300).unwrap();
assert!(a.intersect(&c).is_none());
}
#[test]
fn test_interval_merge() {
let a = GenomicInterval::new("chr1", 100, 200).unwrap();
let b = GenomicInterval::new("chr1", 150, 250).unwrap();
let merged = a.merge(&b).unwrap();
assert_eq!(merged.start, 100);
assert_eq!(merged.end, 250);
let c = GenomicInterval::new("chr1", 300, 400).unwrap();
assert!(a.merge(&c).is_none());
}
#[test]
fn test_interval_distance() {
let a = GenomicInterval::new("chr1", 100, 200).unwrap();
let b = GenomicInterval::new("chr1", 300, 400).unwrap();
let c = GenomicInterval::new("chr2", 100, 200).unwrap();
let d = GenomicInterval::new("chr1", 150, 250).unwrap();
assert_eq!(a.distance(&b), Some(100));
assert_eq!(b.distance(&a), Some(100));
assert_eq!(a.distance(&c), None);
assert_eq!(a.distance(&d), Some(0));
}
#[test]
fn test_interval_midpoint() {
let iv = GenomicInterval::new("chr1", 100, 200).unwrap();
assert_eq!(iv.midpoint(), 150);
}
#[test]
fn test_interval_display() {
let iv = GenomicInterval::with_strand("chr1", 100, 200, Strand::Forward).unwrap();
assert_eq!(iv.to_string(), "chr1:100-200(+)");
}
}