use std::collections::HashMap;
use std::collections::HashSet;
use std::ops::Index;
use serde::{Deserialize, Serialize};
use crate::collections::{Feature, UniqueCheck};
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
pub struct FeatureList {
genes: Vec<Feature>,
}
impl Default for FeatureList {
fn default() -> Self {
Self::new()
}
}
impl FeatureList {
pub fn new() -> Self {
Self { genes: Vec::new() }
}
pub fn from(genes: Vec<Feature>) -> Self {
Self { genes }
}
pub fn push(&mut self, feature: Feature) {
self.genes.push(feature);
}
pub fn pop(&mut self) -> Option<Feature> {
self.genes.pop()
}
pub fn check_unique(&self) -> UniqueCheck {
let mut seen: HashMap<&str, Vec<usize>> = HashMap::new();
for (index, feature) in self.genes.iter().enumerate() {
seen.entry(feature.id()).or_default().push(index);
}
let duplicates: HashMap<String, Vec<usize>> = seen
.into_iter()
.filter(|(_, indices)| indices.len() > 1)
.map(|(id, indices)| (id.to_string(), indices))
.collect();
if duplicates.is_empty() {
UniqueCheck::Unique
} else {
UniqueCheck::Duplicates(duplicates)
}
}
pub fn genes(&self) -> &[Feature] {
&self.genes
}
pub fn len(&self) -> usize {
self.genes.len()
}
pub fn is_empty(&self) -> bool {
self.genes.is_empty()
}
pub fn remove(&mut self, index: usize) -> Result<Feature, String> {
if index >= self.genes.len() {
return Err(format!(
"Index out of bounds: {} (length: {})",
index,
self.genes.len()
));
}
Ok(self.genes.remove(index))
}
pub fn intersect<'a>(&'a self, other: &FeatureList) -> Vec<&'a Feature> {
let ids1: HashSet<_> = self.genes().iter().map(|g| g.id()).collect();
let ids2: HashSet<_> = other.genes().iter().map(|g| g.id()).collect();
self.genes()
.iter()
.filter(|feature| ids1.contains(feature.id()) && ids2.contains(feature.id()))
.collect()
}
pub fn difference<'a>(&'a self, other: &'a FeatureList, symmetric: bool) -> Vec<&'a Feature> {
let ids1: HashSet<_> = self.genes().iter().map(|g| g.id()).collect();
let ids2: HashSet<_> = other.genes().iter().map(|g| g.id()).collect();
if symmetric {
self.genes()
.iter()
.chain(other.genes().iter())
.filter(|feature| {
ids1.symmetric_difference(&ids2)
.any(|&id| id == feature.id())
})
.collect()
} else {
self.genes()
.iter()
.filter(|feature| ids1.difference(&ids2).any(|&id| id == feature.id()))
.collect()
}
}
pub fn iter(&self) -> FeatureListIterator {
FeatureListIterator {
feature_list: self,
index: 0,
}
}
}
pub struct FeatureListIterator<'a> {
feature_list: &'a FeatureList,
index: usize,
}
impl<'a> Iterator for FeatureListIterator<'a> {
type Item = &'a Feature;
fn next(&mut self) -> Option<Self::Item> {
if self.index < self.feature_list.len() {
let item = &self.feature_list.genes[self.index];
self.index += 1;
Some(item)
} else {
None
}
}
}
impl<'a> IntoIterator for &'a FeatureList {
type Item = &'a Feature;
type IntoIter = FeatureListIterator<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
impl From<Vec<String>> for FeatureList {
fn from(ids: Vec<String>) -> Self {
let genes: Vec<Feature> = ids
.into_iter()
.map(|id| Feature::from(id.as_str()))
.collect();
FeatureList { genes }
}
}
impl From<Vec<&str>> for FeatureList {
fn from(ids: Vec<&str>) -> Self {
let genes: Vec<Feature> = ids.into_iter().map(Feature::from).collect();
FeatureList { genes }
}
}
impl Index<usize> for FeatureList {
type Output = Feature;
fn index(&self, index: usize) -> &Self::Output {
&self.genes[index]
}
}