mod dataset;
pub use dataset::{AnnDataSet, StackedAnnData};
use smallvec::SmallVec;
use crate::{
backend::{Backend, DataContainer, GroupOp, StoreOp},
container::{ArrayElem, Axis, AxisArrays, DataFrameElem, Dim, ElemCollection, Slot},
data::*,
traits::AnnDataOp,
};
use anyhow::{anyhow, ensure, Result};
use itertools::Itertools;
use std::path::{Path, PathBuf};
pub struct AnnData<B: Backend> {
pub(crate) file: B::Store,
pub(crate) n_obs: Dim,
pub(crate) n_vars: Dim,
pub(crate) x: ArrayElem<B>,
pub(crate) obs: DataFrameElem<B>,
pub(crate) obsm: AxisArrays<B>,
pub(crate) obsp: AxisArrays<B>,
pub(crate) var: DataFrameElem<B>,
pub(crate) varm: AxisArrays<B>,
pub(crate) varp: AxisArrays<B>,
pub(crate) uns: ElemCollection<B>,
pub(crate) layers: AxisArrays<B>,
}
impl<B: Backend> std::fmt::Debug for AnnData<B> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
}
}
impl<B: Backend> std::fmt::Display for AnnData<B> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"AnnData object with n_obs x n_vars = {} x {} backed at '{}'",
self.n_obs(),
self.n_vars(),
self.filename().to_str().unwrap().to_string(),
)?;
if let Some(obs) = self.obs.lock().as_ref().map(|x| x.get_column_names()) {
if !obs.is_empty() {
write!(f, "\n obs: '{}'", obs.into_iter().join("', '"))?;
}
}
if let Some(var) = self.var.lock().as_ref().map(|x| x.get_column_names()) {
if !var.is_empty() {
write!(f, "\n var: '{}'", var.into_iter().join("', '"))?;
}
}
if let Some(keys) = self.uns.lock().as_ref().map(|x| x.keys().join("', '")) {
if !keys.is_empty() {
write!(f, "\n uns: '{}'", keys)?;
}
}
if let Some(keys) = self.obsm.lock().as_ref().map(|x| x.keys().join("', '")) {
if !keys.is_empty() {
write!(f, "\n obsm: '{}'", keys)?;
}
}
if let Some(keys) = self.obsp.lock().as_ref().map(|x| x.keys().join("', '")) {
if !keys.is_empty() {
write!(f, "\n obsp: '{}'", keys)?;
}
}
if let Some(keys) = self.varm.lock().as_ref().map(|x| x.keys().join("', '")) {
if !keys.is_empty() {
write!(f, "\n varm: '{}'", keys)?;
}
}
if let Some(keys) = self.varp.lock().as_ref().map(|x| x.keys().join("', '")) {
if !keys.is_empty() {
write!(f, "\n varp: '{}'", keys)?;
}
}
if let Some(keys) = self.layers.lock().as_ref().map(|x| x.keys().join("', '")) {
if !keys.is_empty() {
write!(f, "\n layers: '{}'", keys)?;
}
}
Ok(())
}
}
pub(crate) fn new_mapping<G: GroupOp<B>, B: Backend>(store: &G, name: &str) -> Result<B::Group> {
let mut g = store.new_group(name)?;
MAPPING_ENCODING.save(&mut g)?;
Ok(g)
}
pub(crate) fn new_obsm<B: Backend>(group: B::Group, n_obs: &Dim) -> Result<AxisArrays<B>> {
AxisArrays::new(group, Axis::Row, n_obs, None)
}
pub(crate) fn new_obsp<B: Backend>(group: B::Group, n_obs: &Dim) -> Result<AxisArrays<B>> {
AxisArrays::new(group, Axis::Pairwise, n_obs, None)
}
pub(crate) fn new_varm<B: Backend>(group: B::Group, n_vars: &Dim) -> Result<AxisArrays<B>> {
AxisArrays::new(group, Axis::Row, n_vars, None)
}
pub(crate) fn new_varp<B: Backend>(group: B::Group, n_vars: &Dim) -> Result<AxisArrays<B>> {
AxisArrays::new(group, Axis::Pairwise, n_vars, None)
}
pub(crate) fn new_layers<B: Backend>(group: B::Group, n_obs: &Dim, n_vars: &Dim) -> Result<AxisArrays<B>> {
AxisArrays::new(group, Axis::RowColumn, n_obs, Some(n_vars))
}
impl<B: Backend> AnnData<B> {
pub fn get_x(&self) -> &ArrayElem<B> {
&self.x
}
pub fn get_obs(&self) -> &DataFrameElem<B> {
&self.obs
}
pub fn get_var(&self) -> &DataFrameElem<B> {
&self.var
}
pub fn open(file: B::Store) -> Result<Self> {
let n_obs = Dim::empty();
let n_vars = Dim::empty();
let x = if file.exists("X")? {
let x = ArrayElem::try_from(DataContainer::open(&file, "X")?)?;
n_obs.try_set(x.inner().shape()[0])?;
n_vars.try_set(x.inner().shape()[1])?;
x
} else {
Slot::none()
};
let obs = if file.exists("obs")? {
let obs = DataFrameElem::try_from(DataContainer::open(&file, "obs")?)?;
n_obs.try_set(obs.inner().height())?;
obs
} else {
Slot::none()
};
let var = if file.exists("var")? {
let var = DataFrameElem::try_from(DataContainer::open(&file, "var")?)?;
n_vars.try_set(var.inner().height())?;
var
} else {
Slot::none()
};
let obsm = match file.open_group("obsm").or(new_mapping(&file, "obsm")) {
Ok(group) => new_obsm(group, &n_obs)?,
_ => AxisArrays::empty(),
};
let obsp = match file.open_group("obsp").or(new_mapping(&file, "obsp")) {
Ok(group) => new_obsp(group, &n_obs)?,
_ => AxisArrays::empty(),
};
let varm = match file.open_group("varm").or(new_mapping(&file, "varm")) {
Ok(group) => new_varm(group, &n_vars)?,
_ => AxisArrays::empty(),
};
let varp = match file.open_group("varp").or(new_mapping(&file, "varp")) {
Ok(group) => new_varp(group, &n_vars)?,
_ => AxisArrays::empty(),
};
let uns = match file.open_group("uns").or(new_mapping(&file, "uns")) {
Ok(group) => ElemCollection::new(group)?,
_ => ElemCollection::empty(),
};
let layers = match file.open_group("layers").or(new_mapping(&file, "layers")) {
Ok(group) => new_layers(group, &n_obs, &n_vars)?,
_ => AxisArrays::empty(),
};
Ok(Self {
file,
n_obs,
n_vars,
x,
obs,
obsm,
obsp,
var,
varm,
varp,
uns,
layers,
})
}
pub fn new<P: AsRef<Path>>(filename: P) -> Result<Self> {
let file = B::new(filename)?;
let n_obs = Dim::empty();
let n_vars = Dim::empty();
Ok(Self {
x: Slot::none(),
obs: Slot::none(),
var: Slot::none(),
obsm: new_obsm(new_mapping(&file, "obsm")?, &n_obs)?,
obsp: new_obsp(new_mapping(&file, "obsp")?, &n_obs)?,
varm: new_varm(new_mapping(&file, "varm")?, &n_vars)?,
varp: new_varp(new_mapping(&file, "varp")?, &n_vars)?,
uns: ElemCollection::new(new_mapping(&file, "uns")?)?,
layers: new_layers(new_mapping(&file, "layers")?, &n_obs, &n_vars)?,
file,
n_obs,
n_vars,
})
}
pub fn write<O: Backend, P: AsRef<Path>>(&self, filename: P) -> Result<()> {
let file = O::new(filename)?;
let _obs_lock = self.n_obs.lock();
let _vars_lock = self.n_vars.lock();
self.get_x()
.lock()
.as_mut()
.map(|x| x.export::<O, _>(&file, "X"))
.transpose()?;
self.get_obs()
.lock()
.as_mut()
.map(|x| x.export::<O, _>(&file, "obs"))
.transpose()?;
self.get_var()
.lock()
.as_mut()
.map(|x| x.export::<O, _>(&file, "var"))
.transpose()?;
self.obsm()
.lock()
.as_mut()
.map(|x| x.export::<O, _>(&file, "obsm"))
.transpose()?;
self.obsp()
.lock()
.as_mut()
.map(|x| x.export::<O, _>(&file, "obsp"))
.transpose()?;
self.varm()
.lock()
.as_mut()
.map(|x| x.export::<O, _>(&file, "varm"))
.transpose()?;
self.varp()
.lock()
.as_mut()
.map(|x| x.export::<O, _>(&file, "varp"))
.transpose()?;
self.uns()
.lock()
.as_mut()
.map(|x| x.export::<O, _>(&file, "uns"))
.transpose()?;
self.layers()
.lock()
.as_mut()
.map(|x| x.export::<O, _>(&file, "layers"))
.transpose()?;
file.close()?;
Ok(())
}
pub fn write_select<O, S, P>(&self, selection: S, filename: P) -> Result<()>
where
O: Backend,
S: AsRef<[SelectInfoElem]>,
P: AsRef<Path>,
{
selection.as_ref()[0]
.bound_check(self.n_obs())
.map_err(|e| anyhow!("AnnData obs {}", e))?;
selection.as_ref()[1]
.bound_check(self.n_vars())
.map_err(|e| anyhow!("AnnData var {}", e))?;
let slice: SmallVec<[_; 3]> = selection.as_ref().iter().collect();
let file = O::new(filename)?;
let _obs_lock = self.n_obs.lock();
let _vars_lock = self.n_vars.lock();
self.get_x()
.lock()
.as_mut()
.map(|x| x.export_select::<O, _>(slice.as_slice(), &file, "X"))
.transpose()?;
self.get_obs()
.lock()
.as_mut()
.map(|x| x.export_axis(0, slice[0], &file, "obs"))
.transpose()?;
self.get_var()
.lock()
.as_mut()
.map(|x| x.export_axis(0, slice[1], &file, "var"))
.transpose()?;
self.uns()
.lock()
.as_mut()
.map(|x| x.export(&file, "uns"))
.transpose()?;
self.obsm()
.lock()
.as_mut()
.map(|x| x.export_select(&[slice[0]], &file, "obsm"))
.transpose()?;
self.obsp()
.lock()
.as_mut()
.map(|x| x.export_select(&[slice[0]], &file, "obsp"))
.transpose()?;
self.varm()
.lock()
.as_mut()
.map(|x| x.export_select(&[slice[1]], &file, "varm"))
.transpose()?;
self.varp()
.lock()
.as_mut()
.map(|x| x.export_select(&[slice[1]], &file, "varp"))
.transpose()?;
self.layers()
.lock()
.as_mut()
.map(|x| x.export_select(slice.as_slice(), &file, "layers"))
.transpose()?;
file.close()?;
Ok(())
}
pub fn filename(&self) -> PathBuf {
self.file.filename()
}
pub fn close(self) -> Result<()> {
macro_rules! close {
($($name:ident),*) => {
$(
self.$name.lock().as_ref().map(|x| x.values().for_each(|x| x.drop()));
self.$name.drop();
)*
};
}
self.x.drop();
self.obs.drop();
self.var.drop();
close!(obsm, obsp, varm, varp, uns);
self.file.close()
}
pub fn subset<S>(&self, selection: S) -> Result<()>
where
S: AsRef<[SelectInfoElem]>,
{
let mut obs_lock = self.n_obs.lock();
let mut vars_lock = self.n_vars.lock();
let slice = selection.as_ref();
ensure!(
slice.len() == 2,
format!("subset only supports 2D selections, got {}", slice.len())
);
let obs_ix = &slice[0];
let var_ix = &slice[1];
self.x
.lock()
.as_mut()
.map(|x| x.subset(slice))
.transpose()?;
self.obs
.lock()
.as_mut()
.map(|x| x.subset_axis(0, obs_ix))
.transpose()?;
self.obsm
.lock()
.as_mut()
.map(|obsm| obsm.subset(&[obs_ix]))
.transpose()?;
self.obsp
.lock()
.as_mut()
.map(|obsp| obsp.subset(&[obs_ix]))
.transpose()?;
self.var
.lock()
.as_mut()
.map(|x| x.subset_axis(0, var_ix))
.transpose()?;
self.varm
.lock()
.as_mut()
.map(|varm| varm.subset(&[var_ix]))
.transpose()?;
self.varp
.lock()
.as_mut()
.map(|varp| varp.subset(&[var_ix]))
.transpose()?;
self.layers
.lock()
.as_mut()
.map(|layers| layers.subset(&[obs_ix, var_ix]))
.transpose()?;
if !obs_lock.is_empty() {
obs_lock.set(SelectInfoElemBounds::new(obs_ix, obs_lock.get()).len());
}
if !vars_lock.is_empty() {
vars_lock.set(SelectInfoElemBounds::new(var_ix, vars_lock.get()).len());
}
Ok(())
}
}