use serde::{Deserialize, Serialize};
use std::ops::Range;
use crate::{
error::GRangesError,
traits::{AdjustableGenericRange, GenericRange, GenericRangeOperations, IndexedDataContainer},
Position,
};
pub mod coitrees;
pub mod operations;
pub mod vec;
#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)]
pub struct RangeEmpty {
pub start: Position,
pub end: Position,
}
unsafe impl Sync for RangeEmpty {}
unsafe impl Send for RangeEmpty {}
impl RangeEmpty {
pub fn new(start: Position, end: Position) -> Self {
assert!(end > start);
Self { start, end }
}
}
impl GenericRange for RangeEmpty {
fn start(&self) -> Position {
self.start
}
fn end(&self) -> Position {
self.end
}
fn index(&self) -> Option<usize> {
None
}
}
impl GenericRangeOperations for RangeEmpty {
fn flanking_ranges<R: GenericRange>(
&self,
left_flank: Option<Position>,
right_flank: Option<Position>,
seqlen: Position,
) -> Vec<Self> {
let mut flanking = Vec::new();
if let Some(left) = left_flank {
let flank_start = std::cmp::max(self.start.saturating_sub(left), 0);
let flank_end = std::cmp::min(self.start, seqlen);
if flank_end > flank_start {
let left_flank_region = RangeEmpty::new(flank_start, flank_end);
flanking.push(left_flank_region);
}
}
if let Some(right) = right_flank {
let flank_start = std::cmp::max(self.end, 0);
let flank_end = std::cmp::min(self.end + right, seqlen);
if flank_end > flank_start {
let right_flank_region = RangeEmpty::new(flank_start, flank_end);
flanking.push(right_flank_region);
}
}
flanking
}
}
impl AdjustableGenericRange for RangeEmpty {
fn set_start(&mut self, start: Position) {
self.start = start
}
fn set_end(&mut self, end: Position) {
self.end = end
}
}
impl From<RangeIndexed> for RangeEmpty {
fn from(value: RangeIndexed) -> Self {
RangeEmpty {
start: value.start,
end: value.end,
}
}
}
#[derive(Clone, Debug, Default, PartialEq)]
pub struct RangeIndexed {
pub start: Position,
pub end: Position,
pub index: usize,
}
unsafe impl Sync for RangeIndexed {}
unsafe impl Send for RangeIndexed {}
impl RangeIndexed {
pub fn new(start: Position, end: Position, index: usize) -> Self {
assert!(end > start, "{}-{}", start, end);
Self { start, end, index }
}
}
impl GenericRange for RangeIndexed {
fn start(&self) -> Position {
self.start
}
fn end(&self) -> Position {
self.end
}
fn index(&self) -> Option<usize> {
Some(self.index)
}
}
impl GenericRangeOperations for RangeIndexed {
fn flanking_ranges<R: GenericRange>(
&self,
left_flank: Option<Position>,
right_flank: Option<Position>,
seqlen: Position,
) -> Vec<Self> {
let mut flanking = Vec::new();
if let Some(left) = left_flank {
let flank_start = std::cmp::max(self.start.saturating_sub(left), 0);
let flank_end = std::cmp::min(self.start, seqlen);
if flank_end > flank_start {
let left_flank_region = RangeIndexed::new(flank_start, flank_end, self.index);
flanking.push(left_flank_region);
}
}
if let Some(right) = right_flank {
let flank_start = std::cmp::max(self.end, 0);
let flank_end = std::cmp::min(self.end + right, seqlen);
if flank_end > flank_start {
let right_flank_region = RangeIndexed::new(flank_start, flank_end, self.index);
flanking.push(right_flank_region);
}
}
flanking
}
}
impl AdjustableGenericRange for RangeIndexed {
fn set_start(&mut self, start: Position) {
self.start = start
}
fn set_end(&mut self, end: Position) {
self.end = end
}
}
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
pub struct GenomicRangeRecord<U> {
pub seqname: String,
pub start: Position,
pub end: Position,
pub data: U,
}
impl<U> GenomicRangeRecord<U> {
pub fn new(seqname: String, start: Position, end: Position, data: U) -> Self {
assert!(end > start);
Self {
seqname,
start,
end,
data,
}
}
pub fn into_map_data<F, V>(self, func: F) -> GenomicRangeRecord<V>
where
F: Fn(U) -> V,
{
GenomicRangeRecord {
seqname: self.seqname,
start: self.start,
end: self.end,
data: func(self.data),
}
}
pub fn into_empty(self) -> GenomicRangeRecordEmpty {
GenomicRangeRecordEmpty {
seqname: self.seqname,
start: self.start,
end: self.end,
}
}
}
impl<U: Clone> GenericRange for GenomicRangeRecord<U> {
fn start(&self) -> Position {
self.start
}
fn end(&self) -> Position {
self.end
}
fn index(&self) -> Option<usize> {
None
}
}
impl<U: Clone> AdjustableGenericRange for GenomicRangeRecord<U> {
fn set_start(&mut self, start: Position) {
self.start = start
}
fn set_end(&mut self, end: Position) {
self.end = end
}
}
impl<U: Clone> GenericRangeOperations for GenomicRangeRecord<U> {
fn flanking_ranges<R: GenericRange>(
&self,
left_flank: Option<Position>,
right_flank: Option<Position>,
seqlen: Position,
) -> Vec<Self> {
let mut flanking = Vec::new();
if let Some(left) = left_flank {
let flank_start = std::cmp::max(self.start.saturating_sub(left), 0);
let flank_end = std::cmp::min(self.start, seqlen);
if flank_end > flank_start {
let left_flank_region = GenomicRangeRecord::new(
self.seqname.clone(),
flank_start,
flank_end,
self.data.clone(),
);
flanking.push(left_flank_region);
}
}
if let Some(right) = right_flank {
let flank_start = std::cmp::max(self.end, 0);
let flank_end = std::cmp::min(self.end + right, seqlen);
if flank_end > flank_start {
let right_flank_region = GenomicRangeRecord::new(
self.seqname.clone(),
flank_start,
flank_end,
self.data.clone(),
);
flanking.push(right_flank_region);
}
}
flanking
}
}
#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
#[serde(deny_unknown_fields)]
pub struct GenomicRangeRecordEmpty {
pub seqname: String,
pub start: Position,
pub end: Position,
}
impl GenomicRangeRecordEmpty {
pub fn new(seqname: String, start: Position, end: Position) -> Self {
assert!(end > start);
Self {
seqname,
start,
end,
}
}
}
impl GenericRange for GenomicRangeRecordEmpty {
fn start(&self) -> Position {
self.start
}
fn end(&self) -> Position {
self.end
}
fn index(&self) -> Option<usize> {
None
}
}
impl AdjustableGenericRange for GenomicRangeRecordEmpty {
fn set_start(&mut self, start: Position) {
self.start = start
}
fn set_end(&mut self, end: Position) {
self.end = end
}
}
impl GenericRangeOperations for GenomicRangeRecordEmpty {
fn flanking_ranges<R: GenericRange>(
&self,
left_flank: Option<Position>,
right_flank: Option<Position>,
seqlen: Position,
) -> Vec<Self> {
let mut flanking = Vec::new();
if let Some(left) = left_flank {
let flank_start = std::cmp::max(self.start.saturating_sub(left), 0);
let flank_end = std::cmp::min(self.start, seqlen);
if flank_end > flank_start {
let left_flank_region =
GenomicRangeRecordEmpty::new(self.seqname.clone(), flank_start, flank_end);
flanking.push(left_flank_region);
}
}
if let Some(right) = right_flank {
let flank_start = std::cmp::max(self.end, 0);
let flank_end = std::cmp::min(self.end + right, seqlen);
if flank_end > flank_start {
let right_flank_region =
GenomicRangeRecordEmpty::new(self.seqname.clone(), flank_start, flank_end);
flanking.push(right_flank_region);
}
}
flanking
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct GenomicRangeIndexedRecord {
pub seqname_index: usize,
pub start: Position,
pub end: Position,
pub index: Option<usize>,
}
impl GenomicRangeIndexedRecord {
pub fn new(seqname_index: usize, start: Position, end: Position, index: Option<usize>) -> Self {
assert!(end > start);
Self {
seqname_index,
start,
end,
index,
}
}
pub fn seqname(&self, seqnames: &[String]) -> String {
seqnames[self.seqname_index].clone()
}
pub fn to_record<'a, T>(
self,
seqnames: &[String],
data: &'a T,
) -> GenomicRangeRecord<<T as IndexedDataContainer>::Item<'a>>
where
T: IndexedDataContainer,
{
let data = data.get_value(self.index().unwrap());
GenomicRangeRecord {
seqname: seqnames[self.seqname_index].clone(),
start: self.start,
end: self.end,
data,
}
}
pub fn to_record_empty<T>(self, seqnames: &[String]) -> GenomicRangeRecordEmpty {
GenomicRangeRecordEmpty {
seqname: seqnames[self.seqname_index].clone(),
start: self.start,
end: self.end,
}
}
}
impl GenericRange for GenomicRangeIndexedRecord {
fn start(&self) -> Position {
self.start
}
fn end(&self) -> Position {
self.end
}
fn index(&self) -> Option<usize> {
self.index
}
}
impl AdjustableGenericRange for GenomicRangeIndexedRecord {
fn set_start(&mut self, start: Position) {
self.start = start
}
fn set_end(&mut self, end: Position) {
self.end = end
}
}
pub fn validate_range(
start: Position,
end: Position,
length: Position,
) -> Result<(), GRangesError> {
if start > end {
return Err(GRangesError::InvalidGenomicRange(start, end));
}
if end >= length {
return Err(GRangesError::InvalidGenomicRangeForSequence(
start, end, length,
));
}
Ok(())
}
pub fn try_range(
start: Position,
end: Position,
length: Position,
) -> Result<Range<usize>, GRangesError> {
if start >= end {
return Err(GRangesError::InvalidGenomicRange(start, end));
}
if end > length {
return Err(GRangesError::InvalidGenomicRangeForSequence(
start, end, length,
));
}
let start_usize: usize = start.try_into().unwrap();
let end_usize: usize = end.try_into().unwrap();
Ok(start_usize..end_usize)
}
#[cfg(test)]
mod tests {
use super::{validate_range, RangeEmpty};
use crate::prelude::*;
#[test]
fn test_invalid_range_start_end() {
let result = validate_range(5, 1, 10);
assert!(matches!(
result,
Err(GRangesError::InvalidGenomicRange(5, 1))
));
}
#[test]
fn test_valid_range_length() {
let result = validate_range(1, 10, 11);
assert!(result.is_ok());
}
#[test]
fn test_invalid_range_length() {
let result = validate_range(1, 10, 10);
assert!(matches!(
result,
Err(GRangesError::InvalidGenomicRangeForSequence(1, 10, 10))
));
}
#[test]
fn test_overlap_range() {
let range_a = RangeEmpty::new(5, 8);
let range_b = RangeEmpty::new(4, 6);
assert_eq!(range_a.overlap_range(&range_b), Some((5, 6)));
}
#[test]
fn test_width() {
let range_a = RangeEmpty::new(5, 8);
assert_eq!(range_a.width(), 3);
}
#[test]
fn test_overlap_width() {
let range_a = RangeEmpty::new(0, 2);
let range_b = RangeEmpty::new(4, 6);
assert_eq!(range_a.overlap_width(&range_b), 0);
let range_a = RangeEmpty::new(0, 2);
let range_b = RangeEmpty::new(2, 6);
assert_eq!(range_a.overlap_width(&range_b), 0);
let range_a = RangeEmpty::new(1, 3);
let range_b = RangeEmpty::new(2, 5);
assert_eq!(range_a.overlap_width(&range_b), 1);
let range_a = RangeEmpty::new(1, 10);
let range_b = RangeEmpty::new(2, 5);
assert_eq!(range_a.overlap_width(&range_b), 3);
}
#[test]
fn test_distance_or_overlap() {
let range_a = RangeEmpty::new(0, 2);
let range_b = RangeEmpty::new(4, 6);
assert_eq!(range_a.distance_or_overlap(&range_b), 2);
let range_a = RangeEmpty::new(0, 2);
let range_b = RangeEmpty::new(2, 6);
assert_eq!(range_a.distance_or_overlap(&range_b), 0);
let range_a = RangeEmpty::new(1, 3);
let range_b = RangeEmpty::new(2, 5);
assert_eq!(range_a.distance_or_overlap(&range_b), -1);
let range_a = RangeEmpty::new(1, 10);
let range_b = RangeEmpty::new(2, 5);
assert_eq!(range_a.distance_or_overlap(&range_b), -3);
}
}