use std::path::PathBuf;
use indexmap::IndexMap;
use crate::{
data::DatumType,
error::GRangesError,
granges::GRanges,
io::{
parsers::{FilteredRanges, UnwrappedRanges},
tsv::TsvConfig,
},
join::LeftGroupedJoin,
prelude::VecRangesIndexed,
ranges::GenomicRangeRecord,
Position, PositionOffset,
};
pub trait AsGRangesRef<'a, C, T> {
fn as_granges_ref(&'a self) -> &'a GRanges<C, T>;
}
pub trait LeftOverlaps<'a, Right> {
type Output;
fn left_overlaps(self, right: &'a Right) -> Result<Self::Output, GRangesError>;
}
pub trait GenomicRangesTsvSerialize<'a, C: RangeContainer> {
fn write_to_tsv(
&'a self,
output: Option<impl Into<PathBuf>>,
config: &TsvConfig,
) -> Result<(), GRangesError>;
}
pub trait GenericRange: Clone {
fn start(&self) -> Position;
fn end(&self) -> Position;
fn index(&self) -> Option<usize>;
fn width(&self) -> Position {
self.end() - self.start()
}
fn midpoint(&self) -> Position {
(self.start() + self.end()) / 2
}
fn overlap_width<R: GenericRange>(&self, other: &R) -> Position {
let overlap_start = std::cmp::max(self.start(), other.start());
let overlap_end = std::cmp::min(self.end(), other.end());
if overlap_start >= overlap_end {
return 0;
}
overlap_end.saturating_sub(overlap_start)
}
fn has_overlap_with<R: GenericRange>(&self, other: &R) -> bool {
self.overlap_width(other) > 0
}
fn distance_or_overlap<R: GenericRange>(&self, other: &R) -> PositionOffset {
if self.end() > other.start() && self.start() < other.end() {
let overlap: PositionOffset = self.overlap_width(other).try_into().unwrap();
-overlap
} else if self.end() == other.start() || self.start() == other.end() {
0
} else {
std::cmp::max(
other.start().saturating_sub(self.end()),
self.start().saturating_sub(other.end()),
) as PositionOffset
}
}
fn overlap_range<R: GenericRange>(&self, other: &R) -> Option<(Position, Position)> {
let overlap_start = std::cmp::max(self.start(), other.start());
let overlap_end = std::cmp::min(self.end(), other.end());
if overlap_start <= overlap_end {
Some((overlap_start, overlap_end))
} else {
None
}
}
fn as_tuple(&self) -> (Position, Position, Option<usize>) {
(self.start(), self.end(), self.index())
}
}
pub trait GenericGenomicRange: GenericRange {
fn seqname_index(&self) -> usize;
fn genomic_overlap_width<R: GenericGenomicRange>(&self, other: &R) -> Position {
if self.seqname_index() != other.seqname_index() {
return 0; }
let overlap_start = std::cmp::max(self.start(), other.start());
let overlap_end = std::cmp::min(self.end(), other.end());
if overlap_start >= overlap_end {
0
} else {
overlap_end.saturating_sub(overlap_start)
}
}
fn genomic_overlap_range<R: GenericGenomicRange>(
&self,
other: &R,
) -> Option<(Position, Position)> {
if self.seqname_index() != other.seqname_index() {
return None; }
let overlap_start = std::cmp::max(self.start(), other.start());
let overlap_end = std::cmp::min(self.end(), other.end());
if overlap_start <= overlap_end {
Some((overlap_start, overlap_end))
} else {
None
}
}
}
pub trait JoinDataOperations<DL, DR> {
type LeftDataElementType;
type RightDataElementType;
fn join(&self) -> &LeftGroupedJoin;
fn left_data(&self) -> Option<&Self::LeftDataElementType>;
fn right_data(&self) -> Option<&Vec<Self::RightDataElementType>>;
}
pub trait IntoDatumType {
fn into_data_type(self) -> DatumType;
}
pub trait Selection {
fn select_by_name(&self, name: &str) -> DatumType;
fn select(&self, names: &[String]) -> Vec<DatumType> {
names.iter().map(|name| self.select_by_name(name)).collect()
}
}
pub trait GenericRangeOperations: GenericRange {
fn flanking_ranges<R: GenericRange>(
&self,
left_flank: Option<Position>,
right_flank: Option<Position>,
seqlen: Position,
) -> Vec<Self>;
}
pub trait AdjustableGenericRange: GenericRange {
fn set_start(&mut self, start: Position);
fn set_end(&mut self, end: Position);
}
pub trait RangeContainer {
type InternalRangeType; fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn sequence_length(&self) -> Position;
}
pub trait DataContainer {}
pub trait GeneralRangeRecordIterator<R: GenericRange>:
Iterator<Item = Result<R, GRangesError>> + Sized
{
fn retain_seqnames(self, seqnames: &[String]) -> FilteredRanges<Self, R>;
fn exclude_seqnames(self, seqnames: &[String]) -> FilteredRanges<Self, R>;
}
pub trait GenomicRangeRecordUnwrappable:
Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>> + Sized
{
fn try_unwrap_data(self) -> UnwrappedRanges<Self>;
}
pub trait IterableRangeContainer
where
Self: RangeContainer,
<Self as IterableRangeContainer>::RangeType: GenericRange,
{
type RangeType: GenericRange; fn iter_ranges(&self) -> Box<dyn Iterator<Item = Self::RangeType> + '_>;
}
pub trait IntoIterableRangesContainer<R> {
fn into_iter_ranges(self) -> Box<dyn Iterator<Item = R>>;
}
pub trait IndexedDataContainer: DataContainer {
type Item<'a>
where
Self: 'a;
type OwnedItem;
fn is_valid_index(&self, index: usize) -> bool;
fn get_value(&self, index: usize) -> <Self as IndexedDataContainer>::Item<'_>;
fn get_owned(&self, index: usize) -> <Self as IndexedDataContainer>::OwnedItem;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn invalid_indices(&self, indices: &[usize]) -> Vec<usize> {
let mut invalid = Vec::new();
for &index in indices {
if !self.is_valid_index(index) {
invalid.push(index);
}
}
invalid
}
}
pub trait Sequences {
type Container<'a>: 'a
where
Self: 'a;
type Slice<'a>;
fn seqnames(&self) -> Vec<String>;
fn get_sequence(&self, seqname: &str) -> Result<Self::Container<'_>, GRangesError>;
fn get_sequence_length(&self, seqname: &str) -> Result<Position, GRangesError>;
fn region_map<V, F>(
&self,
func: &F,
seqname: &str,
start: Position,
end: Position,
) -> Result<V, GRangesError>
where
F: Fn(<Self as Sequences>::Slice<'_>) -> V;
fn seqlens(&self) -> Result<IndexMap<String, Position>, GRangesError> {
let mut seqlens = IndexMap::new();
for seqname in self.seqnames() {
let seqlen = self.get_sequence_length(&seqname)?;
seqlens.insert(seqname, seqlen);
}
Ok(seqlens)
}
fn region_map_into_granges<'b, C, F, V, T: 'b>(
&self,
granges: &'b impl AsGRangesRef<'b, C, T>,
func: &F,
) -> Result<GRanges<VecRangesIndexed, Vec<V>>, GRangesError>
where
V: Clone,
C: IterableRangeContainer + 'b,
F: Fn(<Self as Sequences>::Slice<'_>) -> V,
{
let granges_ref = granges.as_granges_ref();
let seqlens = &granges_ref.seqlens().clone();
let mut gr: GRanges<VecRangesIndexed, Vec<V>> = GRanges::new_vec(seqlens);
for (seqname, ranges) in granges_ref.ranges.iter() {
for range in ranges.iter_ranges() {
let (start, end) = (range.start(), range.end());
let value = self.region_map(&func, seqname, start, end)?;
gr.push_range(seqname, start, end, value.clone())?;
}
}
Ok(gr)
}
}