anndata_memory/ad/mod.rs
1use anndata::{
2 container::{Axis, Dim},
3 data::{DataFrameIndex, SelectInfoElem},
4 ArrayData, HasShape,
5};
6use helpers::IMAxisArrays;
7use log::{log, Level};
8use polars::{frame::DataFrame, prelude::Column};
9
10use crate::{base::DeepClone, IMArrayElement, IMDataFrameElement, IMElementCollection};
11
12pub(crate) mod helpers;
13
14pub struct IMAnnData {
15 /// Number of observations (rows).
16 pub(crate) n_obs: Dim,
17 /// Number of variables (columns).
18 pub(crate) n_vars: Dim,
19 /// Data matrix.
20 x: IMArrayElement,
21 /// Observations metadata.
22 obs: IMDataFrameElement,
23 /// Observation multi-dimensional annotation.
24 obsm: IMAxisArrays,
25 /// Observation pairwise annotation.
26 obsp: IMAxisArrays,
27 /// Variables metadata.
28 var: IMDataFrameElement,
29 /// Variable multi-dimensional annotation.
30 varm: IMAxisArrays,
31 /// Variable pairwise annotation.
32 varp: IMAxisArrays,
33 /// Unstructured annotation.
34 uns: IMElementCollection,
35 /// Layers of data.
36 layers: IMAxisArrays,
37}
38
39impl IMAnnData {
40 /// Creates a new `IMAnnData` instance.
41 ///
42 /// # Arguments
43 ///
44 /// * `x` - Main data matrix.
45 /// * `obs` - Observations metadata.
46 /// * `var` - Variables metadata.
47 ///
48 /// # Returns
49 ///
50 /// Returns `Ok(IMAnnData)` if dimensions match, otherwise returns an `Err`.
51 ///
52 /// # Errors
53 ///
54 /// Returns an error if dimensions mismatch between `x`, `obs`, and `var`.
55 pub fn new(
56 x: IMArrayElement,
57 obs: IMDataFrameElement,
58 var: IMDataFrameElement,
59 ) -> anyhow::Result<Self> {
60 let n_obs = Dim::new(obs.get_data().height());
61 let n_vars = Dim::new(var.get_data().height());
62 // Validate dimensions
63 let x_shape = x.get_shape()?;
64 if x_shape[0] != n_obs.get() || x_shape[1] != n_vars.get() {
65 return Err(anyhow::anyhow!("Dimensions mismatch"));
66 }
67 Ok(Self {
68 n_obs: n_obs.clone(),
69 n_vars: n_vars.clone(),
70 x,
71 obs,
72 var,
73 obsm: IMAxisArrays::new(Axis::Row, n_obs.clone(), None),
74 obsp: IMAxisArrays::new(Axis::Pairwise, n_obs.clone(), None),
75 varm: IMAxisArrays::new(Axis::Row, n_vars.clone(), None),
76 varp: IMAxisArrays::new(Axis::Pairwise, n_vars.clone(), None),
77 uns: IMElementCollection::new_empty(),
78 layers: IMAxisArrays::new(Axis::RowColumn, n_obs.clone(), Some(n_vars.clone())),
79 })
80 }
81
82 /// Creates a new basic `IMAnnData` instance from a sparse matrix and index names.
83 ///
84 /// # Arguments
85 ///
86 /// * `matrix` - A sparse matrix (CsrArray) containing the main data.
87 /// * `obs_names` - Names for the observations (rows).
88 /// * `var_names` - Names for the variables (columns).
89 ///
90 /// # Returns
91 ///
92 /// Returns `Result<IMAnnData>` if successful, otherwise returns an `Err`.
93 ///
94 /// # Errors
95 ///
96 /// Returns an error if there's a mismatch in dimensions or if DataFrame creation fails.
97 pub fn new_basic(
98 matrix: ArrayData,
99 obs_names: Vec<String>,
100 var_names: Vec<String>,
101 ) -> anyhow::Result<Self> {
102 let s = matrix.shape();
103 let n_obs = s[0];
104 let n_vars = s[1];
105
106 // Validate dimensions
107 if n_obs != obs_names.len() || n_vars != var_names.len() {
108 return Err(anyhow::anyhow!(
109 "Dimensions mismatch between matrix and index names"
110 ));
111 }
112
113 // Create basic obs DataFrame and IMDataFrameElement
114 let obs_df = DataFrame::new(vec![Column::new("index".into(), &obs_names)])?;
115 let obs_index: DataFrameIndex = obs_names.into();
116 let obs = IMDataFrameElement::new(obs_df, obs_index);
117
118 // Create basic var DataFrame and IMDataFrameElement
119 let var_df = DataFrame::new(vec![Column::new("index".into(), &var_names)])?;
120 let var_index: DataFrameIndex = var_names.into();
121 let var = IMDataFrameElement::new(var_df, var_index);
122
123 // Create the IMAnnData object
124 IMAnnData::new(IMArrayElement::new(matrix), obs, var)
125 }
126
127 pub fn new_extended(
128 matrix: ArrayData,
129 obs_names: Vec<String>,
130 var_names: Vec<String>,
131 obs_df: DataFrame,
132 var_df: DataFrame,
133 ) -> anyhow::Result<Self> {
134 let s = matrix.shape();
135 let n_obs = s[0];
136 let n_vars = s[1];
137
138 // Validate dimensions
139 if n_obs != obs_names.len() || n_vars != var_names.len() {
140 return Err(anyhow::anyhow!(
141 "Dimensions mismatch between matrix and index names"
142 ));
143 }
144
145 // Create basic obs DataFrame and IMDataFrameElement
146 let obs_index: DataFrameIndex = obs_names.into();
147 let obs = IMDataFrameElement::new(obs_df, obs_index);
148
149 // Create basic var DataFrame and IMDataFrameElement
150 let var_index: DataFrameIndex = var_names.into();
151 let var = IMDataFrameElement::new(var_df, var_index);
152
153 // Create the IMAnnData object
154 IMAnnData::new(IMArrayElement::new(matrix), obs, var)
155 }
156
157 /// Returns the number of observations.
158 pub fn n_obs(&self) -> usize {
159 self.n_obs.get()
160 }
161
162 /// Returns the number of variables.
163 pub fn n_vars(&self) -> usize {
164 self.n_vars.get()
165 }
166
167 pub fn obs_names(&self) -> Vec<String> {
168 self.obs.get_index().into_vec()
169 }
170
171 pub fn var_names(&self) -> Vec<String> {
172 self.var.get_index().into_vec()
173 }
174
175 /// Returns a shallow clone of the main data matrix.
176 ///
177 /// # Notes
178 ///
179 /// This method returns a new `IMArrayElement` that shares the same underlying data with the original.
180 /// Modifications to the returned `IMArrayElement` will affect the original data.
181 pub fn x(&self) -> IMArrayElement {
182 self.x.clone()
183 }
184
185 /// Returns a shallow clone of the observations metadata.
186 ///
187 /// # Notes
188 ///
189 /// This method returns a new `IMDataFrameElement` that shares the same underlying data with the original.
190 /// Modifications to the returned `IMDataFrameElement` will affect the original data.
191 pub fn obs(&self) -> IMDataFrameElement {
192 self.obs.clone()
193 }
194
195 /// Returns a shallow clone of the variable DataFrame.
196 ///
197 /// # Notes
198 ///
199 /// This method returns a new `IMDataFrameElement` that shares the same underlying data with the original.
200 /// Modifications to the returned `IMDataFrameElement` will affect the original data.
201 pub fn var(&self) -> IMDataFrameElement {
202 self.var.clone()
203 }
204
205 /// Adds a new layer to the `layers` field.
206 ///
207 /// # Arguments
208 ///
209 /// * `name` - Name of the new layer.
210 /// * `data` - Data for the new layer.
211 ///
212 /// # Returns
213 ///
214 /// Returns `Ok(())` if the layer was successfully added, otherwise returns an `Err`.
215 ///
216 /// # Errors
217 ///
218 /// Returns an error if a layer with the same name already exists.
219 pub fn add_layer(&mut self, name: String, data: IMArrayElement) -> anyhow::Result<()> {
220 self.layers.add_array(name, data)
221 }
222
223 /// Retrieves a deep clone of a layer by name.
224 ///
225 /// # Arguments
226 ///
227 /// * `name` - Name of the layer to retrieve.
228 ///
229 /// # Returns
230 ///
231 /// Returns `Ok(IMArrayElement)` if the layer was found, otherwise returns an `Err`.
232 ///
233 /// # Errors
234 ///
235 /// Returns an error if the layer is not found.
236 pub fn get_layer(&self, name: &str) -> anyhow::Result<IMArrayElement> {
237 self.layers.get_array(name)
238 }
239
240 /// Retrieves a shallow clone of a layer by name.
241 ///
242 /// # Arguments
243 ///
244 /// * `name` - Name of the layer to retrieve.
245 ///
246 /// # Returns
247 ///
248 /// Returns `Ok(IMArrayElement)` if the layer was found, otherwise returns an `Err`.
249 ///
250 /// # Errors
251 ///
252 /// Returns an error if the layer is not found.
253 pub fn get_layer_shallow(&self, name: &str) -> anyhow::Result<IMArrayElement> {
254 self.layers.get_array_shallow(name)
255 }
256
257 /// Removes a layer by name and returns it.
258 ///
259 /// # Arguments
260 ///
261 /// * `name` - Name of the layer to remove.
262 ///
263 /// # Returns
264 ///
265 /// Returns `Ok(IMArrayElement)` with the removed layer if found, otherwise returns an `Err`.
266 ///
267 /// # Errors
268 ///
269 /// Returns an error if the layer is not found.
270 pub fn remove_layer(&mut self, name: &str) -> anyhow::Result<IMArrayElement> {
271 self.layers.remove_array(name)
272 }
273
274 /// Updates an existing layer with new data.
275 ///
276 /// # Arguments
277 ///
278 /// * `name` - Name of the layer to update.
279 /// * `data` - New data for the layer.
280 ///
281 /// # Returns
282 ///
283 /// Returns `Ok(())` if the layer was successfully updated, otherwise returns an `Err`.
284 ///
285 /// # Errors
286 ///
287 /// Returns an error if the layer is not found.
288 pub fn update_array(&mut self, name: &str, data: IMArrayElement) -> anyhow::Result<()> {
289 self.layers.update_array(name, data)
290 }
291
292 /// Returns a shallow clone of the observation multi-dimensional annotation.
293 ///
294 /// # Returns
295 ///
296 /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
297 ///
298 /// # Notes
299 ///
300 /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
301 /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
302 /// will affect the original data in the `IMAnnData` instance.
303 pub fn obsm(&self) -> IMAxisArrays {
304 self.obsm.clone()
305 }
306
307 /// Returns a shallow clone of the observation pairwise annotation.
308 ///
309 /// # Returns
310 ///
311 /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
312 ///
313 /// # Notes
314 ///
315 /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
316 /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
317 /// will affect the original data in the `IMAnnData` instance.
318 pub fn obsp(&self) -> IMAxisArrays {
319 self.obsp.clone()
320 }
321
322 /// Returns a shallow clone of the variable multi-dimensional annotation.
323 ///
324 /// # Returns
325 ///
326 /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
327 ///
328 /// # Notes
329 ///
330 /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
331 /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
332 /// will affect the original data in the `IMAnnData` instance.
333 pub fn varm(&self) -> IMAxisArrays {
334 self.varm.clone()
335 }
336
337 /// Returns a shallow clone of the variable pairwise annotation.
338 ///
339 /// # Returns
340 ///
341 /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
342 ///
343 /// # Notes
344 ///
345 /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
346 /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
347 /// will affect the original data in the `IMAnnData` instance.
348 pub fn varp(&self) -> IMAxisArrays {
349 self.varp.clone()
350 }
351
352 /// Returns a shallow clone of the unstructured annotation.
353 ///
354 /// # Returns
355 ///
356 /// Returns an `IMElementCollection` instance that shares the same underlying data with the original.
357 ///
358 /// # Notes
359 ///
360 /// This method performs a shallow clone, meaning the returned `IMElementCollection` shares the same
361 /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
362 /// will affect the original data in the `IMAnnData` instance.
363 pub fn uns(&self) -> IMElementCollection {
364 self.uns.clone()
365 }
366
367 /// Returns a shallow clone of the layers of data.
368 ///
369 /// # Returns
370 ///
371 /// Returns an `IMAxisArrays` instance that shares the same underlying data with the original.
372 ///
373 /// # Notes
374 ///
375 /// This method performs a shallow clone, meaning the returned `IMAxisArrays` shares the same
376 /// Arc pointer to the RwLock containing the data. Any modifications made through this clone
377 /// will affect the original data in the `IMAnnData` instance.
378 pub fn layers(&self) -> IMAxisArrays {
379 self.layers.clone()
380 }
381 // !!!!! THIS IS VERY UNSAFE as it might allow for lock races, requires wrapping IMAnnData into a RwLock in order to prevent that, or transition to async data running of functions !!!!!
382 pub fn subset_inplace(&mut self, selection: &[&SelectInfoElem]) -> anyhow::Result<()> {
383 log!(Level::Debug, "Staring subsetting inplace");
384 if selection.len() != 2 {
385 return Err(anyhow::anyhow!("Invalid selection, only 2-dimensional selections are supported on the in-memory anndata object!"));
386 }
387
388 let obs_sel = selection[0];
389 let var_sel = selection[1];
390
391 // check if these changes are valid
392 log!(Level::Debug, "Performing boundchecks");
393 obs_sel.bound_check(self.n_obs())?;
394 var_sel.bound_check(self.n_vars())?;
395
396 log!(Level::Debug, "Subsetting X");
397 self.x.subset_inplace(selection)?;
398 log!(Level::Debug, "Subsetting obs");
399 self.obs.subset_inplace(obs_sel)?;
400 log!(Level::Debug, "Subsetting var");
401 self.var.subset_inplace(var_sel)?;
402 log!(Level::Debug, "Subsetting layers");
403 self.layers.subset_inplace(selection)?;
404 log!(Level::Debug, "Subsetting obsm");
405 self.obsm
406 .subset_inplace(vec![&obs_sel.clone(), &SelectInfoElem::full()].as_slice())?;
407 log!(Level::Debug, "Subsetting obsp");
408 self.obsp
409 .subset_inplace(vec![&obs_sel.clone(), &obs_sel.clone()].as_slice())?;
410 log!(Level::Debug, "Subsetting varm");
411 self.varm
412 .subset_inplace(vec![&var_sel.clone(), &SelectInfoElem::full()].as_slice())?;
413 log!(Level::Debug, "Subsetting varp");
414 self.varp
415 .subset_inplace(vec![&var_sel.clone(), &var_sel.clone()].as_slice())?;
416
417 self.n_obs = Dim::new(self.obs.get_data().height());
418 self.n_vars = Dim::new(self.var.get_data().height());
419
420 Ok(())
421 }
422
423 pub fn subset(&self, selection: &[&SelectInfoElem]) -> anyhow::Result<Self> {
424 if selection.len() != 2 {
425 return Err(anyhow::anyhow!("Invalid selection, only 2-dimensional selections are supported on the in-memory anndata object!"));
426 }
427
428 let obs_sel = selection[0];
429 let var_sel = selection[1];
430
431 // check if these changes are valid
432 obs_sel.bound_check(self.n_obs())?;
433 var_sel.bound_check(self.n_vars())?;
434
435 let obs = self.obs.subset(obs_sel)?;
436 let var = self.var.subset(var_sel)?;
437 let layers = self.layers.subset(selection)?;
438 let obsm = self
439 .obsm
440 .subset(vec![&obs_sel.clone(), &SelectInfoElem::full()].as_slice())?;
441 let obsp = self
442 .obsp
443 .subset(vec![&obs_sel.clone(), &obs_sel.clone()].as_slice())?;
444 let varm = self
445 .varm
446 .subset(vec![&var_sel.clone(), &SelectInfoElem::full()].as_slice())?;
447 let varp = self
448 .varp
449 .subset(vec![&var_sel.clone(), &var_sel.clone()].as_slice())?;
450
451 let x = self.x.subset(selection)?;
452
453 Ok(IMAnnData {
454 n_obs: Dim::new(obs.get_data().height()),
455 n_vars: Dim::new(var.get_data().height()),
456 x,
457 obs,
458 obsm,
459 obsp,
460 var,
461 varm,
462 varp,
463 uns: self.uns.clone(),
464 layers,
465 })
466 }
467
468 #[cfg(test)]
469 pub fn debug_info(&self) -> anyhow::Result<()> {
470 println!("AnnData Debug Info:");
471 println!(" Dimensions: {} obs x {} vars", self.n_obs(), self.n_vars());
472
473 let x_shape = self.x().get_shape()?;
474 println!(" X matrix shape: {:?}", x_shape);
475
476 let obs_df_shape = self.obs().get_data().shape();
477 let var_df_shape = self.var().get_data().shape();
478 println!(" obs DataFrame shape: {:?}", obs_df_shape);
479 println!(" var DataFrame shape: {:?}", var_df_shape);
480
481 println!(" First 3 obs names: {:?}", &self.obs_names()[..self.n_obs().min(3)]);
482 println!(" First 3 var names: {:?}", &self.var_names()[..self.n_vars().min(3)]);
483
484 Ok(())
485 }
486}
487
488use std::fmt;
489
490impl fmt::Display for IMAnnData {
491 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
492 writeln!(f, "IMAnnData Object")?;
493 writeln!(f, "-----------------")?;
494 writeln!(
495 f,
496 "Dimensions: {} observations x {} variables",
497 self.n_obs(),
498 self.n_vars()
499 )?;
500
501 // X matrix info
502 let x_shape = self.x().get_shape().map_err(|_| fmt::Error)?;
503 writeln!(
504 f,
505 "X: {:?} {}",
506 x_shape,
507 self.x().get_type().map_err(|_| fmt::Error)?
508 )?;
509
510 // Layers info
511 let layer_keys = self.layers().keys();
512 writeln!(
513 f,
514 "Layers: {} - {}",
515 layer_keys.len(),
516 layer_keys.join(", ")
517 )?;
518
519 // Obs and Var info
520 writeln!(
521 f,
522 "Obs DataFrame Shape: {:?}",
523 self.obs().get_data().shape()
524 )?;
525 writeln!(
526 f,
527 "Var DataFrame Shape: {:?}",
528 self.var().get_data().shape()
529 )?;
530
531 // Obsm, Obsp, Varm, Varp info
532 writeln!(f, "Obsm keys: {}", self.obsm().keys().join(", "))?;
533 writeln!(f, "Obsp keys: {}", self.obsp().keys().join(", "))?;
534 writeln!(f, "Varm keys: {}", self.varm().keys().join(", "))?;
535 writeln!(f, "Varp keys: {}", self.varp().keys().join(", "))?;
536
537 // Uns info
538
539 Ok(())
540 }
541}
542
543impl DeepClone for IMAnnData {
544 fn deep_clone(&self) -> Self {
545 Self {
546 n_obs: self.n_obs.clone(),
547 n_vars: self.n_vars.clone(),
548 x: self.x.deep_clone(),
549 obs: self.obs.deep_clone(),
550 obsm: self.obsm.deep_clone(),
551 obsp: self.obsp.deep_clone(),
552 var: self.var.deep_clone(),
553 varm: self.varm.deep_clone(),
554 varp: self.varp.deep_clone(),
555 uns: self.uns.deep_clone(),
556 layers: self.layers.deep_clone(),
557 }
558 }
559}