use crate::error::{Error, Result};
use std::cmp::{max, min};
use std::fmt;
pub mod conversions;
pub mod annotation;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct GenomicInterval {
pub chrom: String,
pub start: u64,
pub end: u64,
}
impl GenomicInterval {
pub fn new<S: Into<String>>(chrom: S, start: u64, end: u64) -> Self {
let interval = Self {
chrom: chrom.into(),
start,
end,
};
assert!(
start <= end,
"Invalid interval: start ({}) > end ({})",
start,
end
);
interval
}
pub fn try_new<S: Into<String>>(chrom: S, start: u64, end: u64) -> Result<Self> {
if start > end {
return Err(Error::InvalidInput(format!(
"Invalid interval: start ({}) > end ({})",
start, end
)));
}
Ok(Self {
chrom: chrom.into(),
start,
end,
})
}
pub fn point<S: Into<String>>(chrom: S, pos: u64) -> Self {
Self {
chrom: chrom.into(),
start: pos,
end: pos + 1,
}
}
#[inline]
pub fn len(&self) -> u64 {
self.end - self.start
}
#[inline]
pub fn is_empty(&self) -> bool {
self.start == self.end
}
#[inline]
pub fn is_point(&self) -> bool {
self.len() == 1
}
#[inline]
pub fn overlaps(&self, other: &GenomicInterval) -> bool {
self.chrom == other.chrom && self.start < other.end && other.start < self.end
}
#[inline]
pub fn contains(&self, other: &GenomicInterval) -> bool {
self.chrom == other.chrom && self.start <= other.start && other.end <= self.end
}
#[inline]
pub fn contains_pos(&self, chrom: &str, pos: u64) -> bool {
self.chrom == chrom && self.start <= pos && pos < self.end
}
pub fn overlap_length(&self, other: &GenomicInterval) -> u64 {
if !self.overlaps(other) {
0
} else {
min(self.end, other.end) - max(self.start, other.start)
}
}
pub fn intersect(&self, other: &GenomicInterval) -> Option<GenomicInterval> {
if !self.overlaps(other) {
None
} else {
Some(GenomicInterval {
chrom: self.chrom.clone(),
start: max(self.start, other.start),
end: min(self.end, other.end),
})
}
}
pub fn union(&self, other: &GenomicInterval) -> Option<GenomicInterval> {
if self.chrom != other.chrom {
None
} else {
Some(GenomicInterval {
chrom: self.chrom.clone(),
start: min(self.start, other.start),
end: max(self.end, other.end),
})
}
}
pub fn distance(&self, other: &GenomicInterval) -> Option<u64> {
if self.chrom != other.chrom {
None
} else if self.overlaps(other) {
Some(0)
} else if self.end <= other.start {
Some(other.start - self.end)
} else {
Some(self.start - other.end)
}
}
pub fn expand(&self, amount: u64) -> GenomicInterval {
GenomicInterval {
chrom: self.chrom.clone(),
start: self.start.saturating_sub(amount),
end: self.end + amount,
}
}
pub fn shrink(&self, amount: u64) -> Option<GenomicInterval> {
let new_start = self.start + amount;
let new_end = self.end.saturating_sub(amount);
if new_start <= new_end {
Some(GenomicInterval {
chrom: self.chrom.clone(),
start: new_start,
end: new_end,
})
} else {
None
}
}
pub fn to_bed_string(&self) -> String {
format!("{}\t{}\t{}", self.chrom, self.start, self.end)
}
pub fn from_bed_string(s: &str) -> Result<Self> {
let parts: Vec<&str> = s.split('\t').collect();
if parts.len() < 3 {
return Err(Error::Parse(format!(
"Invalid BED string: expected at least 3 fields, got {}",
parts.len()
)));
}
let chrom = parts[0].to_string();
let start = parts[1].parse::<u64>().map_err(|_| {
Error::Parse(format!("Invalid start position: {}", parts[1]))
})?;
let end = parts[2].parse::<u64>().map_err(|_| {
Error::Parse(format!("Invalid end position: {}", parts[2]))
})?;
Self::try_new(chrom, start, end)
}
}
impl fmt::Display for GenomicInterval {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}-{}", self.chrom, self.start, self.end)
}
}
impl PartialOrd for GenomicInterval {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for GenomicInterval {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
(&self.chrom, self.start, self.end).cmp(&(&other.chrom, other.start, other.end))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new() {
let interval = GenomicInterval::new("chr1", 100, 200);
assert_eq!(interval.chrom, "chr1");
assert_eq!(interval.start, 100);
assert_eq!(interval.end, 200);
assert_eq!(interval.len(), 100);
}
#[test]
#[should_panic]
fn test_invalid_interval() {
GenomicInterval::new("chr1", 200, 100); }
#[test]
fn test_try_new() {
assert!(GenomicInterval::try_new("chr1", 200, 100).is_err());
assert!(GenomicInterval::try_new("chr1", 100, 200).is_ok());
}
#[test]
fn test_point() {
let interval = GenomicInterval::point("chr1", 100);
assert_eq!(interval.start, 100);
assert_eq!(interval.end, 101);
assert!(interval.is_point());
}
#[test]
fn test_overlaps() {
let a = GenomicInterval::new("chr1", 100, 200);
let b = GenomicInterval::new("chr1", 150, 250);
let c = GenomicInterval::new("chr1", 200, 300);
let d = GenomicInterval::new("chr2", 100, 200);
assert!(a.overlaps(&b));
assert!(!a.overlaps(&c)); assert!(!a.overlaps(&d)); }
#[test]
fn test_contains() {
let a = GenomicInterval::new("chr1", 100, 300);
let b = GenomicInterval::new("chr1", 150, 200);
assert!(a.contains(&b));
assert!(!b.contains(&a));
}
#[test]
fn test_contains_pos() {
let interval = GenomicInterval::new("chr1", 100, 200);
assert!(interval.contains_pos("chr1", 100));
assert!(interval.contains_pos("chr1", 150));
assert!(!interval.contains_pos("chr1", 200)); assert!(!interval.contains_pos("chr2", 150));
}
#[test]
fn test_overlap_length() {
let a = GenomicInterval::new("chr1", 100, 200);
let b = GenomicInterval::new("chr1", 150, 250);
let c = GenomicInterval::new("chr1", 200, 300);
assert_eq!(a.overlap_length(&b), 50);
assert_eq!(a.overlap_length(&c), 0);
}
#[test]
fn test_intersect() {
let a = GenomicInterval::new("chr1", 100, 200);
let b = GenomicInterval::new("chr1", 150, 250);
let intersection = a.intersect(&b).unwrap();
assert_eq!(intersection.start, 150);
assert_eq!(intersection.end, 200);
let c = GenomicInterval::new("chr1", 200, 300);
assert!(a.intersect(&c).is_none());
}
#[test]
fn test_union() {
let a = GenomicInterval::new("chr1", 100, 200);
let b = GenomicInterval::new("chr1", 300, 400);
let union = a.union(&b).unwrap();
assert_eq!(union.start, 100);
assert_eq!(union.end, 400);
let c = GenomicInterval::new("chr2", 100, 200);
assert!(a.union(&c).is_none());
}
#[test]
fn test_distance() {
let a = GenomicInterval::new("chr1", 100, 200);
let b = GenomicInterval::new("chr1", 250, 300);
let c = GenomicInterval::new("chr1", 150, 250);
assert_eq!(a.distance(&b), Some(50));
assert_eq!(a.distance(&c), Some(0)); }
#[test]
fn test_expand() {
let interval = GenomicInterval::new("chr1", 100, 200);
let expanded = interval.expand(50);
assert_eq!(expanded.start, 50);
assert_eq!(expanded.end, 250);
let small = GenomicInterval::new("chr1", 10, 20);
let expanded = small.expand(50);
assert_eq!(expanded.start, 0);
}
#[test]
fn test_shrink() {
let interval = GenomicInterval::new("chr1", 100, 200);
let shrunk = interval.shrink(25).unwrap();
assert_eq!(shrunk.start, 125);
assert_eq!(shrunk.end, 175);
assert!(interval.shrink(100).is_none());
}
#[test]
fn test_bed_string() {
let interval = GenomicInterval::new("chr1", 100, 200);
assert_eq!(interval.to_bed_string(), "chr1\t100\t200");
let parsed = GenomicInterval::from_bed_string("chr1\t100\t200").unwrap();
assert_eq!(parsed, interval);
}
#[test]
fn test_display() {
let interval = GenomicInterval::new("chr1", 100, 200);
assert_eq!(format!("{}", interval), "chr1:100-200");
}
#[test]
fn test_sorting() {
let mut intervals = vec![
GenomicInterval::new("chr2", 100, 200),
GenomicInterval::new("chr1", 300, 400),
GenomicInterval::new("chr1", 100, 200),
];
intervals.sort();
assert_eq!(intervals[0].chrom, "chr1");
assert_eq!(intervals[0].start, 100);
assert_eq!(intervals[1].chrom, "chr1");
assert_eq!(intervals[1].start, 300);
assert_eq!(intervals[2].chrom, "chr2");
}
}