mod dataset;
pub(crate) mod elem_io;
pub use dataset::{AnnDataSet, StackedAnnData};
use elem_io::{new_mapping, open_layers, open_obsm, open_obsp, open_varm, open_varp};
use crate::{
ArrayElemOp, AxisArraysOp, ElemCollectionOp,
backend::{Backend, DataContainer, GroupOp, StoreOp},
container::{ArrayElem, AxisArrays, DataFrameElem, Dim, ElemCollection, Slot},
data::*,
traits::AnnDataOp,
};
use anyhow::{Result, ensure};
use itertools::Itertools;
use std::{
collections::HashSet,
path::{Path, PathBuf},
};
pub struct AnnData<B: Backend> {
pub(crate) file: B::Store,
pub(crate) n_obs: Dim,
pub(crate) n_vars: Dim,
pub(crate) x: ArrayElem<B>,
pub(crate) obs: DataFrameElem<B>,
pub(crate) obsm: AxisArrays<B>,
pub(crate) obsp: AxisArrays<B>,
pub(crate) var: DataFrameElem<B>,
pub(crate) varm: AxisArrays<B>,
pub(crate) varp: AxisArrays<B>,
pub(crate) uns: ElemCollection<B>,
pub(crate) layers: AxisArrays<B>,
}
impl<B: Backend> std::fmt::Debug for AnnData<B> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
}
}
impl<B: Backend> std::fmt::Display for AnnData<B> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"AnnData object with n_obs x n_vars = {} x {} backed at '{}'",
self.n_obs(),
self.n_vars(),
self.filename().to_str().unwrap(),
)?;
if let Some(obs) = self.obs.lock().as_ref().map(|x| x.get_column_names())
&& !obs.is_empty()
{
write!(f, "\n obs: '{}'", obs.into_iter().join("', '"))?;
}
if let Some(var) = self.var.lock().as_ref().map(|x| x.get_column_names())
&& !var.is_empty()
{
write!(f, "\n var: '{}'", var.into_iter().join("', '"))?;
}
if let Some(keys) = self.uns.lock().as_ref().map(|x| x.keys().join("', '"))
&& !keys.is_empty()
{
write!(f, "\n uns: '{keys}'")?;
}
if let Some(keys) = self.obsm.lock().as_ref().map(|x| x.keys().join("', '"))
&& !keys.is_empty()
{
write!(f, "\n obsm: '{keys}'")?;
}
if let Some(keys) = self.obsp.lock().as_ref().map(|x| x.keys().join("', '"))
&& !keys.is_empty()
{
write!(f, "\n obsp: '{keys}'")?;
}
if let Some(keys) = self.varm.lock().as_ref().map(|x| x.keys().join("', '"))
&& !keys.is_empty()
{
write!(f, "\n varm: '{keys}'")?;
}
if let Some(keys) = self.varp.lock().as_ref().map(|x| x.keys().join("', '"))
&& !keys.is_empty()
{
write!(f, "\n varp: '{keys}'")?;
}
if let Some(keys) = self.layers.lock().as_ref().map(|x| x.keys().join("', '"))
&& !keys.is_empty()
{
write!(f, "\n layers: '{keys}'")?;
}
Ok(())
}
}
impl<B: Backend> AnnData<B> {
pub fn get_x(&self) -> &ArrayElem<B> {
&self.x
}
pub fn get_obs(&self) -> &DataFrameElem<B> {
&self.obs
}
pub fn get_var(&self) -> &DataFrameElem<B> {
&self.var
}
pub fn open(file: B::Store) -> Result<Self> {
let n_obs = Dim::empty();
let n_vars = Dim::empty();
let x = if file.exists("X")? {
let x = ArrayElem::try_from(DataContainer::open(&file, "X")?)?;
n_obs.try_set(x.inner().shape()[0])?;
n_vars.try_set(x.inner().shape()[1])?;
x
} else {
Slot::none()
};
let obs = if file.exists("obs")? {
let obs = DataFrameElem::try_from(DataContainer::open(&file, "obs")?)?;
n_obs.try_set(obs.inner().height())?;
obs
} else {
Slot::none()
};
let var = if file.exists("var")? {
let var = DataFrameElem::try_from(DataContainer::open(&file, "var")?)?;
n_vars.try_set(var.inner().height())?;
var
} else {
Slot::none()
};
let obsm = match file.open_group("obsm").or(new_mapping(&file, "obsm")) {
Ok(group) => open_obsm(group, Some(&n_obs))?,
_ => AxisArrays::empty(),
};
let obsp = match file.open_group("obsp").or(new_mapping(&file, "obsp")) {
Ok(group) => open_obsp(group, Some(&n_obs))?,
_ => AxisArrays::empty(),
};
let varm = match file.open_group("varm").or(new_mapping(&file, "varm")) {
Ok(group) => open_varm(group, Some(&n_vars))?,
_ => AxisArrays::empty(),
};
let varp = match file.open_group("varp").or(new_mapping(&file, "varp")) {
Ok(group) => open_varp(group, Some(&n_vars))?,
_ => AxisArrays::empty(),
};
let uns = match file.open_group("uns").or(new_mapping(&file, "uns")) {
Ok(group) => ElemCollection::new(group)?,
_ => ElemCollection::empty(),
};
let layers = match file.open_group("layers").or(new_mapping(&file, "layers")) {
Ok(group) => open_layers(group, Some(&n_obs), Some(&n_vars))?,
_ => AxisArrays::empty(),
};
Ok(Self {
file,
n_obs,
n_vars,
x,
obs,
obsm,
obsp,
var,
varm,
varp,
uns,
layers,
})
}
pub fn new<P: AsRef<Path>>(filename: P) -> Result<Self> {
let file = B::new(filename)?;
let n_obs = Dim::empty();
let n_vars = Dim::empty();
Ok(Self {
x: Slot::none(),
obs: Slot::none(),
var: Slot::none(),
obsm: open_obsm(new_mapping(&file, "obsm")?, Some(&n_obs))?,
obsp: open_obsp(new_mapping(&file, "obsp")?, Some(&n_obs))?,
varm: open_varm(new_mapping(&file, "varm")?, Some(&n_vars))?,
varp: open_varp(new_mapping(&file, "varp")?, Some(&n_vars))?,
uns: ElemCollection::new(new_mapping(&file, "uns")?)?,
layers: open_layers(new_mapping(&file, "layers")?, Some(&n_obs), Some(&n_vars))?,
file,
n_obs,
n_vars,
})
}
pub fn write<O: Backend, P: AsRef<Path>>(
&self,
filename: P,
partial: Option<HashSet<String>>,
chunk_size: Option<usize>,
) -> Result<()> {
let saved_fields = match partial {
Some(set) => set,
None => [
"X", "obs", "var", "obsm", "obsp", "varm", "varp", "uns", "layers",
]
.into_iter()
.map(String::from)
.collect(),
};
let adata = AnnData::<O>::new(filename)?;
adata.set_n_obs(self.n_obs())?;
adata.set_n_vars(self.n_vars())?;
if !self.get_obs().is_none() && saved_fields.contains("obs") {
adata.set_obs_names(self.obs_names())?;
adata.set_obs(self.read_obs()?)?;
}
if !self.get_var().is_none() && saved_fields.contains("var") {
adata.set_var_names(self.var_names())?;
adata.set_var(self.read_var()?)?;
}
if !self.x().is_none() && saved_fields.contains("X") {
if let Some(chunk_size) = chunk_size {
adata.set_x_from_iter(self.x().iter::<ArrayData>(chunk_size).map(|x| x.0))?;
} else {
adata.set_x(self.x().get::<ArrayData>()?.unwrap())?;
}
}
if saved_fields.contains("obsm") {
adata.set_obsm(self.obsm().iter_item::<ArrayData>())?;
}
if saved_fields.contains("obsp") {
adata.set_obsp(self.obsp().iter_item::<ArrayData>())?;
}
if saved_fields.contains("varm") {
adata.set_varm(self.varm().iter_item::<ArrayData>())?;
}
if saved_fields.contains("varp") {
adata.set_varp(self.varp().iter_item::<ArrayData>())?;
}
if saved_fields.contains("uns") {
adata.set_uns(self.uns().iter_item::<Data>())?;
}
if saved_fields.contains("layers") {
adata.set_layers(self.layers().iter_item::<ArrayData>())?;
}
adata.close()
}
pub fn write_select<O, S, P>(&self, selection: S, filename: P) -> Result<()>
where
O: Backend,
S: AsRef<[SelectInfoElem]>,
P: AsRef<Path>,
{
let adata = AnnData::<O>::new(filename)?;
let obs_idx = &selection.as_ref()[0];
let var_idx = &selection.as_ref()[1];
let full = SelectInfoElem::full();
let n_obs = SelectInfoElemBounds::new(&obs_idx, self.n_obs()).len();
let n_vars = SelectInfoElemBounds::new(&var_idx, self.n_vars()).len();
adata.set_n_obs(n_obs)?;
adata.set_n_vars(n_vars)?;
if !self.get_obs().is_none() {
adata.set_obs_names(self.obs_names().select(obs_idx))?;
let obs = Selectable::select_axis(&self.read_obs()?, 0, obs_idx);
adata.set_obs(obs)?;
}
if !self.get_var().is_none() {
adata.set_var_names(self.var_names().select(var_idx))?;
let var = Selectable::select_axis(&self.read_var()?, 0, var_idx);
adata.set_var(var)?;
}
if let Some(x) = self.x().slice::<ArrayData, _>(&selection)? {
adata.set_x(x)?;
}
adata.set_obsm(
self.obsm()
.iter_item_slice::<ArrayData, _>(&[obs_idx.clone(), full.clone()]),
)?;
adata.set_obsp(
self.obsp()
.iter_item_slice::<ArrayData, _>(&[obs_idx.clone(), obs_idx.clone()]),
)?;
adata.set_varm(
self.varm()
.iter_item_slice::<ArrayData, _>(&[var_idx.clone(), full]),
)?;
adata.set_varp(
self.varp()
.iter_item_slice::<ArrayData, _>(&[var_idx.clone(), var_idx.clone()]),
)?;
adata.set_uns(self.uns().iter_item::<Data>())?;
adata.set_layers(self.layers().iter_item_slice::<ArrayData, _>(&selection))?;
adata.close()
}
pub fn filename(&self) -> PathBuf {
self.file.filename()
}
pub fn close(self) -> Result<()> {
macro_rules! close {
($($name:ident),*) => {
$(
self.$name.lock().as_ref().map(|x| x.values().for_each(|x| x.drop()));
self.$name.drop();
)*
};
}
self.x.drop();
self.obs.drop();
self.var.drop();
close!(obsm, obsp, varm, varp, uns);
self.file.close()
}
pub fn subset<S>(&self, selection: S) -> Result<()>
where
S: AsRef<[SelectInfoElem]>,
{
let mut obs_lock = self.n_obs.lock();
let mut vars_lock = self.n_vars.lock();
let slice = selection.as_ref();
ensure!(
slice.len() == 2,
format!("subset only supports 2D selections, got {}", slice.len())
);
let obs_ix = &slice[0];
let var_ix = &slice[1];
self.x
.lock()
.as_mut()
.map(|x| x.subset(slice))
.transpose()?;
self.obs
.lock()
.as_mut()
.map(|x| x.subset_axis(0, obs_ix))
.transpose()?;
self.obsm
.lock()
.as_mut()
.map(|obsm| obsm.subset(&[obs_ix]))
.transpose()?;
self.obsp
.lock()
.as_mut()
.map(|obsp| obsp.subset(&[obs_ix]))
.transpose()?;
self.var
.lock()
.as_mut()
.map(|x| x.subset_axis(0, var_ix))
.transpose()?;
self.varm
.lock()
.as_mut()
.map(|varm| varm.subset(&[var_ix]))
.transpose()?;
self.varp
.lock()
.as_mut()
.map(|varp| varp.subset(&[var_ix]))
.transpose()?;
self.layers
.lock()
.as_mut()
.map(|layers| layers.subset(&[obs_ix, var_ix]))
.transpose()?;
if !obs_lock.is_empty() {
obs_lock.set(SelectInfoElemBounds::new(obs_ix, obs_lock.get()).len());
}
if !vars_lock.is_empty() {
vars_lock.set(SelectInfoElemBounds::new(var_ix, vars_lock.get()).len());
}
Ok(())
}
}