anndata_memory/ad/
mod.rs

1use anndata::{
2    container::{Axis, Dim},
3    data::{DataFrameIndex, SelectInfoElem},
4    ArrayData, HasShape,
5};
6use helpers::IMAxisArrays;
7use log::{log, Level};
8use polars::{frame::DataFrame, prelude::Column};
9
10use crate::{base::DeepClone, IMArrayElement, IMDataFrameElement, IMElementCollection};
11
12pub(crate) mod helpers;
13
14pub struct IMAnnData {
15    /// Number of observations (rows).
16    pub(crate) n_obs: Dim,
17    /// Number of variables (columns).
18    pub(crate) n_vars: Dim,
19    /// Data matrix.
20    x: IMArrayElement,
21    /// Observations metadata.
22    obs: IMDataFrameElement,
23    /// Observation multi-dimensional annotation.
24    obsm: IMAxisArrays,
25    /// Observation pairwise annotation.
26    obsp: IMAxisArrays,
27    /// Variables metadata.
28    var: IMDataFrameElement,
29    /// Variable multi-dimensional annotation.
30    varm: IMAxisArrays,
31    /// Variable pairwise annotation.
32    varp: IMAxisArrays,
33    /// Unstructured annotation.
34    uns: IMElementCollection,
35    /// Layers of data.
36    layers: IMAxisArrays,
37}
38
39impl IMAnnData {
40    /// Creates a new `IMAnnData` instance.
41    ///
42    /// # Arguments
43    ///
44    /// * `x` - Main data matrix.
45    /// * `obs` - Observations metadata.
46    /// * `var` - Variables metadata.
47    ///
48    /// # Returns
49    ///
50    /// Returns `Ok(IMAnnData)` if dimensions match, otherwise returns an `Err`.
51    ///
52    /// # Errors
53    ///
54    /// Returns an error if dimensions mismatch between `x`, `obs`, and `var`.
55    pub fn new(
56        x: IMArrayElement,
57        obs: IMDataFrameElement,
58        var: IMDataFrameElement,
59    ) -> anyhow::Result<Self> {
60        let n_obs = Dim::new(obs.get_data().height());
61        let n_vars = Dim::new(var.get_data().height());
62        // Validate dimensions
63        let x_shape = x.get_shape()?;
64        if x_shape[0] != n_obs.get() || x_shape[1] != n_vars.get() {
65            return Err(anyhow::anyhow!("Dimensions mismatch"));
66        }
67        Ok(Self {
68            n_obs: n_obs.clone(),
69            n_vars: n_vars.clone(),
70            x,
71            obs,
72            var,
73            obsm: IMAxisArrays::new(Axis::Row, n_obs.clone(), None),
74            obsp: IMAxisArrays::new(Axis::Pairwise, n_obs.clone(), None),
75            varm: IMAxisArrays::new(Axis::Row, n_vars.clone(), None),
76            varp: IMAxisArrays::new(Axis::Pairwise, n_vars.clone(), None),
77            uns: IMElementCollection::new_empty(),
78            layers: IMAxisArrays::new(Axis::RowColumn, n_obs.clone(), Some(n_vars.clone())),
79        })
80    }
81
82    /// Creates a new basic `IMAnnData` instance from a sparse matrix and index names.
83    ///
84    /// # Arguments
85    ///
86    /// * `matrix` - A sparse matrix (CsrArray) containing the main data.
87    /// * `obs_names` - Names for the observations (rows).
88    /// * `var_names` - Names for the variables (columns).
89    ///
90    /// # Returns
91    ///
92    /// Returns `Result<IMAnnData>` if successful, otherwise returns an `Err`.
93    ///
94    /// # Errors
95    ///
96    /// Returns an error if there's a mismatch in dimensions or if DataFrame creation fails.
97    pub fn new_basic(
98        matrix: ArrayData,
99        obs_names: Vec<String>,
100        var_names: Vec<String>,
101    ) -> anyhow::Result<Self> {
102        let s = matrix.shape();
103        let n_obs = s[0];
104        let n_vars = s[1];
105
106        // Validate dimensions
107        if n_obs != obs_names.len() || n_vars != var_names.len() {
108            return Err(anyhow::anyhow!(
109                "Dimensions mismatch between matrix and index names"
110            ));
111        }
112
113        // Create basic obs DataFrame and IMDataFrameElement
114        let obs_df = DataFrame::new(vec![Column::new("index".into(), &obs_names)])?;
115        let obs_index: DataFrameIndex = obs_names.into();
116        let obs = IMDataFrameElement::new(obs_df, obs_index);
117
118        // Create basic var DataFrame and IMDataFrameElement
119        let var_df = DataFrame::new(vec![Column::new("index".into(), &var_names)])?;
120        let var_index: DataFrameIndex = var_names.into();
121        let var = IMDataFrameElement::new(var_df, var_index);
122
123        // Create the IMAnnData object
124        IMAnnData::new(IMArrayElement::new(matrix), obs, var)
125    }
126
127    pub fn new_extended(
128        matrix: ArrayData,
129        obs_names: Vec<String>,
130        var_names: Vec<String>,
131        obs_df: DataFrame,
132        var_df: DataFrame,
133    ) -> anyhow::Result<Self> {
134        let s = matrix.shape();
135        let n_obs = s[0];
136        let n_vars = s[1];
137
138        // Validate dimensions
139        if n_obs != obs_names.len() || n_vars != var_names.len() {
140            return Err(anyhow::anyhow!(
141                "Dimensions mismatch between matrix and index names"
142            ));
143        }
144
145        // Create basic obs DataFrame and IMDataFrameElement
146        let obs_index: DataFrameIndex = obs_names.into();
147        let obs = IMDataFrameElement::new(obs_df, obs_index);
148
149        // Create basic var DataFrame and IMDataFrameElement
150        let var_index: DataFrameIndex = var_names.into();
151        let var = IMDataFrameElement::new(var_df, var_index);
152
153        // Create the IMAnnData object
154        IMAnnData::new(IMArrayElement::new(matrix), obs, var)
155    }
156
157    /// Returns the number of observations.
158    pub fn n_obs(&self) -> usize {
159        self.n_obs.get()
160    }
161
162    /// Returns the number of variables.
163    pub fn n_vars(&self) -> usize {
164        self.n_vars.get()
165    }
166
167    pub fn obs_names(&self) -> Vec<String> {
168        self.obs.get_index().into_vec()
169    }
170
171    pub fn var_names(&self) -> Vec<String> {
172        self.var.get_index().into_vec()
173    }
174
175    /// Returns a shallow clone of the main data matrix.
176    ///
177    /// # Notes
178    ///
179    /// This method returns a new `IMArrayElement` that shares the same underlying data with the original.
180    /// Modifications to the returned `IMArrayElement` will affect the original data.
181    pub fn x(&self) -> IMArrayElement {
182        self.x.clone()
183    }
184
185    /// Returns a shallow clone of the observations metadata.
186    ///
187    /// # Notes
188    ///
189    /// This method returns a new `IMDataFrameElement` that shares the same underlying data with the original.
190    /// Modifications to the returned `IMDataFrameElement` will affect the original data.
191    pub fn obs(&self) -> IMDataFrameElement {
192        self.obs.clone()
193    }
194
195    /// Returns a shallow clone of the variable DataFrame.
196    ///
197    /// # Notes
198    ///
199    /// This method returns a new `IMDataFrameElement` that shares the same underlying data with the original.
200    /// Modifications to the returned `IMDataFrameElement` will affect the original data.
201    pub fn var(&self) -> IMDataFrameElement {
202        self.var.clone()
203    }
204
205    /// Adds a new layer to the `layers` field.
206    ///
207    /// # Arguments
208    ///
209    /// * `name` - Name of the new layer.
210    /// * `data` - Data for the new layer.
211    ///
212    /// # Returns
213    ///
214    /// Returns `Ok(())` if the layer was successfully added, otherwise returns an `Err`.
215    ///
216    /// # Errors
217    ///
218    /// Returns an error if a layer with the same name already exists.
219    pub fn add_layer(&mut self, name: String, data: IMArrayElement) -> anyhow::Result<()> {
220        self.layers.add_array(name, data)
221    }
222
223    /// Retrieves a deep clone of a layer by name.
224    ///
225    /// # Arguments
226    ///
227    /// * `name` - Name of the layer to retrieve.
228    ///
229    /// # Returns
230    ///
231    /// Returns `Ok(IMArrayElement)` if the layer was found, otherwise returns an `Err`.
232    ///
233    /// # Errors
234    ///
235    /// Returns an error if the layer is not found.
236    pub fn get_layer(&self, name: &str) -> anyhow::Result<IMArrayElement> {
237        self.layers.get_array(name)
238    }
239
240    /// Retrieves a shallow clone of a layer by name.
241    ///
242    /// # Arguments
243    ///
244    /// * `name` - Name of the layer to retrieve.
245    ///
246    /// # Returns
247    ///
248    /// Returns `Ok(IMArrayElement)` if the layer was found, otherwise returns an `Err`.
249    ///
250    /// # Errors
251    ///
252    /// Returns an error if the layer is not found.
253    pub fn get_layer_shallow(&self, name: &str) -> anyhow::Result<IMArrayElement> {
254        self.layers.get_array_shallow(name)
255    }
256
257    /// Removes a layer by name and returns it.
258    ///
259    /// # Arguments
260    ///
261    /// * `name` - Name of the layer to remove.
262    ///
263    /// # Returns
264    ///
265    /// Returns `Ok(IMArrayElement)` with the removed layer if found, otherwise returns an `Err`.
266    ///
267    /// # Errors
268    ///
269    /// Returns an error if the layer is not found.
270    pub fn remove_layer(&mut self, name: &str) -> anyhow::Result<IMArrayElement> {
271        self.layers.remove_array(name)
272    }
273
274    /// Updates an existing layer with new data.
275    ///
276    /// # Arguments
277    ///
278    /// * `name` - Name of the layer to update.
279    /// * `data` - New data for the layer.
280    ///
281    /// # Returns
282    ///
283    /// Returns `Ok(())` if the layer was successfully updated, otherwise returns an `Err`.
284    ///
285    /// # Errors
286    ///
287    /// Returns an error if the layer is not found.
288    pub fn update_array(&mut self, name: &str, data: IMArrayElement) -> anyhow::Result<()> {
289        self.layers.update_array(name, data)
290    }
291
292    /// Returns a shallow clone of the observation multi-dimensional annotation.
293    ///
294    /// # Returns
295    ///
296    /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
297    ///
298    /// # Notes
299    ///
300    /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
301    /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
302    /// will affect the original data in the `IMAnnData` instance.
303    pub fn obsm(&self) -> IMAxisArrays {
304        self.obsm.clone()
305    }
306
307    /// Returns a shallow clone of the observation pairwise annotation.
308    ///
309    /// # Returns
310    ///
311    /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
312    ///
313    /// # Notes
314    ///
315    /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
316    /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
317    /// will affect the original data in the `IMAnnData` instance.
318    pub fn obsp(&self) -> IMAxisArrays {
319        self.obsp.clone()
320    }
321
322    /// Returns a shallow clone of the variable multi-dimensional annotation.
323    ///
324    /// # Returns
325    ///
326    /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
327    ///
328    /// # Notes
329    ///
330    /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
331    /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
332    /// will affect the original data in the `IMAnnData` instance.
333    pub fn varm(&self) -> IMAxisArrays {
334        self.varm.clone()
335    }
336
337    /// Returns a shallow clone of the variable pairwise annotation.
338    ///
339    /// # Returns
340    ///
341    /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
342    ///
343    /// # Notes
344    ///
345    /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
346    /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
347    /// will affect the original data in the `IMAnnData` instance.
348    pub fn varp(&self) -> IMAxisArrays {
349        self.varp.clone()
350    }
351
352    /// Returns a shallow clone of the unstructured annotation.
353    ///
354    /// # Returns
355    ///
356    /// Returns an `IMElementCollection` instance that shares the same underlying data with the original.
357    ///
358    /// # Notes
359    ///
360    /// This method performs a shallow clone, meaning the returned `IMElementCollection` shares the same
361    /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
362    /// will affect the original data in the `IMAnnData` instance.
363    pub fn uns(&self) -> IMElementCollection {
364        self.uns.clone()
365    }
366
367    /// Returns a shallow clone of the layers of data.
368    ///
369    /// # Returns
370    ///
371    /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
372    ///
373    /// # Notes
374    ///
375    /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
376    /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
377    /// will affect the original data in the `IMAnnData` instance.
378    pub fn layers(&self) -> IMAxisArrays {
379        self.layers.clone()
380    }
381    // !!!!! THIS IS VERY UNSAFE as it might allow for lock races, requires wrapping IMAnnData into a RwLock in order to prevent that, or transition to async data running of functions !!!!!
382    pub fn subset_inplace(&mut self, selection: &[&SelectInfoElem]) -> anyhow::Result<()> {
383        log!(Level::Debug, "Staring subsetting inplace");
384        if selection.len() != 2 {
385            return Err(anyhow::anyhow!("Invalid selection, only 2-dimensional selections are supported on the in-memory anndata object!"));
386        }
387
388        let obs_sel = selection[0];
389        let var_sel = selection[1];
390
391        // check if these changes are valid
392        log!(Level::Debug, "Performing boundchecks");
393        obs_sel.bound_check(self.n_obs())?;
394        var_sel.bound_check(self.n_vars())?;
395
396        log!(Level::Debug, "Subsetting X");
397        self.x.subset_inplace(selection)?;
398        log!(Level::Debug, "Subsetting obs");
399        self.obs.subset_inplace(obs_sel)?;
400        log!(Level::Debug, "Subsetting var");
401        self.var.subset_inplace(var_sel)?;
402        log!(Level::Debug, "Subsetting layers");
403        self.layers.subset_inplace(selection)?;
404        log!(Level::Debug, "Subsetting obsm");
405        self.obsm
406            .subset_inplace(vec![&obs_sel.clone(), &SelectInfoElem::full()].as_slice())?;
407        log!(Level::Debug, "Subsetting obsp");
408        self.obsp
409            .subset_inplace(vec![&obs_sel.clone(), &obs_sel.clone()].as_slice())?;
410        log!(Level::Debug, "Subsetting varm");
411        self.varm
412            .subset_inplace(vec![&var_sel.clone(), &SelectInfoElem::full()].as_slice())?;
413        log!(Level::Debug, "Subsetting varp");
414        self.varp
415            .subset_inplace(vec![&var_sel.clone(), &var_sel.clone()].as_slice())?;
416
417        self.n_obs = Dim::new(self.obs.get_data().height());
418        self.n_vars = Dim::new(self.var.get_data().height());
419
420        Ok(())
421    }
422
423    pub fn subset(&self, selection: &[&SelectInfoElem]) -> anyhow::Result<Self> {
424        if selection.len() != 2 {
425            return Err(anyhow::anyhow!("Invalid selection, only 2-dimensional selections are supported on the in-memory anndata object!"));
426        }
427
428        let obs_sel = selection[0];
429        let var_sel = selection[1];
430
431        // check if these changes are valid
432        obs_sel.bound_check(self.n_obs())?;
433        var_sel.bound_check(self.n_vars())?;
434
435        let obs = self.obs.subset(obs_sel)?;
436        let var = self.var.subset(var_sel)?;
437        let layers = self.layers.subset(selection)?;
438        let obsm = self
439            .obsm
440            .subset(vec![&obs_sel.clone(), &SelectInfoElem::full()].as_slice())?;
441        let obsp = self
442            .obsp
443            .subset(vec![&obs_sel.clone(), &obs_sel.clone()].as_slice())?;
444        let varm = self
445            .varm
446            .subset(vec![&var_sel.clone(), &SelectInfoElem::full()].as_slice())?;
447        let varp = self
448            .varp
449            .subset(vec![&var_sel.clone(), &var_sel.clone()].as_slice())?;
450
451        let x = self.x.subset(selection)?;
452
453        Ok(IMAnnData {
454            n_obs: Dim::new(obs.get_data().height()),
455            n_vars: Dim::new(var.get_data().height()),
456            x,
457            obs,
458            obsm,
459            obsp,
460            var,
461            varm,
462            varp,
463            uns: self.uns.clone(),
464            layers,
465        })
466    }
467
468    #[cfg(test)]
469    pub fn debug_info(&self) -> anyhow::Result<()> {
470        println!("AnnData Debug Info:");
471        println!("  Dimensions: {} obs x {} vars", self.n_obs(), self.n_vars());
472        
473        let x_shape = self.x().get_shape()?;
474        println!("  X matrix shape: {:?}", x_shape);
475        
476        let obs_df_shape = self.obs().get_data().shape();
477        let var_df_shape = self.var().get_data().shape();
478        println!("  obs DataFrame shape: {:?}", obs_df_shape);
479        println!("  var DataFrame shape: {:?}", var_df_shape);
480        
481        println!("  First 3 obs names: {:?}", &self.obs_names()[..self.n_obs().min(3)]);
482        println!("  First 3 var names: {:?}", &self.var_names()[..self.n_vars().min(3)]);
483        
484        Ok(())
485    }
486}
487
488use std::fmt;
489
490impl fmt::Display for IMAnnData {
491    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
492        writeln!(f, "IMAnnData Object")?;
493        writeln!(f, "-----------------")?;
494        writeln!(
495            f,
496            "Dimensions: {} observations x {} variables",
497            self.n_obs(),
498            self.n_vars()
499        )?;
500
501        // X matrix info
502        let x_shape = self.x().get_shape().map_err(|_| fmt::Error)?;
503        writeln!(
504            f,
505            "X: {:?} {}",
506            x_shape,
507            self.x().get_type().map_err(|_| fmt::Error)?
508        )?;
509
510        // Layers info
511        let layer_keys = self.layers().keys();
512        writeln!(
513            f,
514            "Layers: {} - {}",
515            layer_keys.len(),
516            layer_keys.join(", ")
517        )?;
518
519        // Obs and Var info
520        writeln!(
521            f,
522            "Obs DataFrame Shape: {:?}",
523            self.obs().get_data().shape()
524        )?;
525        writeln!(
526            f,
527            "Var DataFrame Shape: {:?}",
528            self.var().get_data().shape()
529        )?;
530
531        // Obsm, Obsp, Varm, Varp info
532        writeln!(f, "Obsm keys: {}", self.obsm().keys().join(", "))?;
533        writeln!(f, "Obsp keys: {}", self.obsp().keys().join(", "))?;
534        writeln!(f, "Varm keys: {}", self.varm().keys().join(", "))?;
535        writeln!(f, "Varp keys: {}", self.varp().keys().join(", "))?;
536
537        // Uns info
538
539        Ok(())
540    }
541}
542
543impl DeepClone for IMAnnData {
544    fn deep_clone(&self) -> Self {
545        Self {
546            n_obs: self.n_obs.clone(),
547            n_vars: self.n_vars.clone(),
548            x: self.x.deep_clone(),
549            obs: self.obs.deep_clone(),
550            obsm: self.obsm.deep_clone(),
551            obsp: self.obsp.deep_clone(),
552            var: self.var.deep_clone(),
553            varm: self.varm.deep_clone(),
554            varp: self.varp.deep_clone(),
555            uns: self.uns.deep_clone(),
556            layers: self.layers.deep_clone(),
557        }
558    }
559}