use std::{cell::Ref, path::Path};
use genomap::GenomeMap;
use indexmap::IndexMap;
#[cfg(feature = "ndarray")]
use ndarray::{s, Array1, Array2, ArrayView1, ArrayView2};
use std::path::PathBuf;
use super::lazy::LazyLoader;
use crate::error::GRangesError;
use crate::ranges::try_range;
use crate::traits::Sequences;
use crate::Position;
pub struct NumericSequences1<T> {
data: GenomeMap<Array1<T>>,
}
impl<T> NumericSequences1<T> {
pub fn new(data: GenomeMap<Array1<T>>) -> Self {
Self { data }
}
}
impl<T> Sequences for NumericSequences1<T>
where
T: Copy + Default + Sized + 'static, {
type Container<'a> = &'a Array1<T>;
type Slice<'a> = ArrayView1<'a, T>;
fn seqnames(&self) -> Vec<String> {
self.data.names()
}
fn get_sequence(&self, seqname: &str) -> Result<Self::Container<'_>, GRangesError> {
let seq = self
.data
.get(seqname)
.ok_or(GRangesError::MissingSequence(seqname.to_string()))?;
Ok(seq)
}
fn region_map<V, F>(
&self,
func: &F,
seqname: &str,
start: Position,
end: Position,
) -> Result<V, GRangesError>
where
F: for<'b> Fn(Self::Slice<'b>) -> V,
{
let seq = self.get_sequence(seqname)?;
let range = try_range(start, end, seq.len().try_into().unwrap())?;
let view = seq.slice(s![range]);
Ok(func(view))
}
fn get_sequence_length(&self, seqname: &str) -> Result<Position, GRangesError> {
let seq = self.get_sequence(seqname)?;
Ok(seq.len().try_into().unwrap())
}
}
pub struct NumericSequences2<T> {
data: GenomeMap<Array2<T>>,
}
impl<T> NumericSequences2<T> {
pub fn new(data: GenomeMap<Array2<T>>) -> Self {
Self { data }
}
}
impl<T> Sequences for NumericSequences2<T>
where
T: Copy + Default + Sized + 'static, {
type Container<'a> = &'a Array2<T>;
type Slice<'a> = ArrayView2<'a, T>;
fn seqnames(&self) -> Vec<String> {
self.data.names()
}
fn get_sequence(&self, seqname: &str) -> Result<Self::Container<'_>, GRangesError> {
let seq = self
.data
.get(seqname)
.ok_or(GRangesError::MissingSequence(seqname.to_string()))?;
Ok(seq)
}
fn region_map<V, F>(
&self,
func: &F,
seqname: &str,
start: Position,
end: Position,
) -> Result<V, GRangesError>
where
F: for<'b> Fn(Self::Slice<'_>) -> V,
{
let seq = self.get_sequence(seqname)?;
let range = try_range(start, end, seq.len().try_into().unwrap())?;
let seq = self.get_sequence(seqname)?;
let view = seq.slice(s![range, ..]);
Ok(func(view))
}
fn get_sequence_length(&self, seqname: &str) -> Result<Position, GRangesError> {
let seq = self.get_sequence(seqname)?;
Ok(seq.len().try_into().unwrap())
}
}
pub struct LazyNumericSequences2<T: std::fmt::Debug> {
seqlens: IndexMap<String, Position>,
lazy: LazyLoader<Option<()>, Array2<T>, String>,
}
impl<T: std::fmt::Debug> LazyNumericSequences2<T> {
pub fn new<F>(dir: &str, pattern: &str, loader: F, seqlens: IndexMap<String, Position>) -> Self
where
F: Fn(PathBuf) -> Result<Array2<T>, GRangesError> + 'static,
{
let dir_path = Path::new(dir).to_owned();
let pattern_owned = pattern.to_owned();
let lazy = LazyLoader::new(None, move |_reader, seqname: &String| {
let filename = pattern_owned.replace("{}", seqname);
let filepath = dir_path.join(filename);
let data: Array2<T> = loader(filepath)?;
Ok(data)
});
Self { seqlens, lazy }
}
}
impl<T> Sequences for LazyNumericSequences2<T>
where
T: Copy + Default + Sized + 'static + std::fmt::Debug, {
type Container<'a> = Ref<'a, Array2<T>>;
type Slice<'a> = ArrayView2<'a, T>;
fn seqnames(&self) -> Vec<String> {
self.seqlens.keys().cloned().collect()
}
fn get_sequence(&self, seqname: &str) -> Result<Self::Container<'_>, GRangesError> {
self.lazy.get_data(&seqname.to_string())
}
fn region_map<V, F>(
&self,
func: &F,
seqname: &str,
start: Position,
end: Position,
) -> Result<V, GRangesError>
where
F: for<'b> Fn(ArrayView2<'b, T>) -> V,
{
let seq = self.get_sequence(seqname)?;
let range = try_range(start, end, seq.len().try_into().unwrap())?;
let view = seq.slice(s![range, ..]);
let value = func(view);
Ok(value)
}
fn get_sequence_length(&self, seqname: &str) -> Result<Position, GRangesError> {
let seq = self.get_sequence(seqname)?;
let seqlen = seq.len().try_into().unwrap();
Ok(seqlen)
}
}
#[cfg(test)]
mod tests {}