labeledarray/
lib.rs

1use ndarray::{ArrayD, Axis, SliceInfoElem};
2use std::collections::HashMap;
3
4/// Enum to define selection criteria for the `sel` method.
5pub enum Selector<'a> {
6    /// Select a single label
7    Label(&'a str),
8    /// Select a slice of labels
9    Slice(Vec<&'a str>),
10}
11
12/// A high-level, labeled multi-dimensional array structure for geospatial data.
13///
14/// This structure provides labeled dimensions, connecting human-readable dimension
15/// names (like "band", "time", "y", "x") to the underlying numerical data store.
16#[derive(Debug, Clone)]
17pub struct LabeledArray<T> {
18    /// The underlying n-dimensional array data
19    data: ArrayD<T>,
20    /// Dimension names in order (e.g., ["time", "band", "y", "x"])
21    dims: Vec<String>,
22    /// Coordinates for each dimension (e.g., {"time": [0, 1, 2], "band": ["red", "green", "blue"]})
23    coords: HashMap<String, Vec<String>>,
24}
25
26impl<T> LabeledArray<T>
27where
28    T: Clone,
29{
30    /// Create a new LabeledArray from raw data and dimension names
31    ///
32    /// # Arguments
33    ///
34    /// * `data` - The underlying n-dimensional array
35    /// * `dims` - Names for each dimension (must match the number of dimensions in data)
36    ///
37    /// # Panics
38    ///
39    /// Panics if the number of dimension names doesn't match the number of dimensions in the data
40    ///
41    /// # Example
42    ///
43    /// ```
44    /// use ndarray::ArrayD;
45    /// use labeledarray::LabeledArray;
46    ///
47    /// let data = ArrayD::from_shape_vec(vec![2, 3], vec![1, 2, 3, 4, 5, 6]).unwrap();
48    /// let dims = vec!["y".to_string(), "x".to_string()];
49    /// let array = LabeledArray::new(data, dims);
50    /// ```
51    pub fn new(data: ArrayD<T>, dims: Vec<String>) -> Self {
52        assert_eq!(
53            data.ndim(),
54            dims.len(),
55            "Number of dimension names must match the number of dimensions in data"
56        );
57
58        Self {
59            data,
60            dims,
61            coords: HashMap::new(),
62        }
63    }
64
65    /// Create a new LabeledArray with coordinates
66    ///
67    /// # Arguments
68    ///
69    /// * `data` - The underlying n-dimensional array
70    /// * `dims` - Names for each dimension
71    /// * `coords` - Coordinate labels for each dimension
72    ///
73    /// # Panics
74    ///
75    /// Panics if the number of dimension names doesn't match the number of dimensions in data,
76    /// or if coordinate lengths don't match their corresponding dimension sizes
77    ///
78    /// # Example
79    ///
80    /// ```
81    /// use ndarray::ArrayD;
82    /// use labeledarray::LabeledArray;
83    /// use std::collections::HashMap;
84    ///
85    /// let data = ArrayD::from_shape_vec(vec![2, 3], vec![1, 2, 3, 4, 5, 6]).unwrap();
86    /// let dims = vec!["y".to_string(), "x".to_string()];
87    /// let mut coords = HashMap::new();
88    /// coords.insert("y".to_string(), vec!["0".to_string(), "10".to_string()]);
89    /// coords.insert("x".to_string(), vec!["0".to_string(), "10".to_string(), "20".to_string()]);
90    /// let array = LabeledArray::new_with_coords(data, dims, coords);
91    /// ```
92    pub fn new_with_coords(
93        data: ArrayD<T>,
94        dims: Vec<String>,
95        coords: HashMap<String, Vec<String>>,
96    ) -> Self {
97        assert_eq!(
98            data.ndim(),
99            dims.len(),
100            "Number of dimension names must match the number of dimensions in data"
101        );
102
103        // Validate coordinate lengths match dimension sizes
104        for (i, dim_name) in dims.iter().enumerate() {
105            if let Some(coord) = coords.get(dim_name) {
106                assert_eq!(
107                    coord.len(),
108                    data.shape()[i],
109                    "Coordinate length for dimension '{}' must match dimension size",
110                    dim_name
111                );
112            }
113        }
114
115        Self { data, dims, coords }
116    }
117
118    /// Get the dimension names
119    pub fn dims(&self) -> &[String] {
120        &self.dims
121    }
122
123    /// Get the shape of the array
124    pub fn shape(&self) -> &[usize] {
125        self.data.shape()
126    }
127
128    /// Get the number of dimensions
129    pub fn ndim(&self) -> usize {
130        self.data.ndim()
131    }
132
133    /// Get a reference to the underlying data
134    pub fn data(&self) -> &ArrayD<T> {
135        &self.data
136    }
137
138    /// Get a mutable reference to the underlying data
139    pub fn data_mut(&mut self) -> &mut ArrayD<T> {
140        &mut self.data
141    }
142
143    /// Get coordinates for a specific dimension
144    pub fn coords(&self, dim: &str) -> Option<&Vec<String>> {
145        self.coords.get(dim)
146    }
147
148    /// Get all coordinates
149    pub fn all_coords(&self) -> &HashMap<String, Vec<String>> {
150        &self.coords
151    }
152
153    /// Add or update coordinates for a dimension
154    ///
155    /// # Panics
156    ///
157    /// Panics if the dimension doesn't exist or if the coordinate length
158    /// doesn't match the dimension size
159    pub fn set_coords(&mut self, dim: &str, coords: Vec<String>) {
160        let dim_index: usize = self
161            .dims
162            .iter()
163            .position(|d| d == dim)
164            .expect("Dimension not found");
165
166        assert_eq!(
167            coords.len(),
168            self.data.shape()[dim_index],
169            "Coordinate length must match dimension size"
170        );
171
172        self.coords.insert(dim.to_string(), coords);
173    }
174
175    /// Get the index of a dimension by name
176    pub fn dim_index(&self, dim: &str) -> Option<usize> {
177        self.dims.iter().position(|d| d == dim)
178    }
179
180    /// Select data along a dimension by coordinate label
181    ///
182    /// Returns None if the dimension or coordinate label is not found
183    pub fn select_by_label(&self, dim: &str, label: &str) -> Option<usize> {
184        let coords: &Vec<String> = self.coords.get(dim)?;
185        coords.iter().position(|c| c == label)
186    }
187
188    /// Select a subset of the array using dimension and coordinate labels.
189    ///
190    /// # Arguments
191    ///
192    /// * `selectors` - A HashMap where keys are dimension names and values
193    ///   are `Selector` enums (`Selector::Label` or `Selector::Slice`).
194    ///
195    /// # Returns
196    ///
197    /// A new `LabeledArray` containing the sliced data.
198    ///
199    /// # Panics
200    ///
201    /// Panics if a specified dimension does not exist or if a label is not found.
202    pub fn sel(&self, selectors: HashMap<&str, Selector>) -> Self {
203        let mut data = self.data.clone();
204        let mut dims = self.dims.clone();
205        let mut coords = self.coords.clone();
206
207        // Process slice selectors first
208        for (dim_name, selector) in &selectors {
209            if let Selector::Slice(labels) = selector {
210                let dim_index = dims
211                    .iter()
212                    .position(|d| d == *dim_name)
213                    .expect("Dimension not found");
214                let current_coords = coords
215                    .get(*dim_name)
216                    .expect("Coordinates not found for dimension");
217                let indices: Vec<usize> = labels
218                    .iter()
219                    .map(|label| {
220                        current_coords
221                            .iter()
222                            .position(|c| c == *label)
223                            .expect("Label not found")
224                    })
225                    .collect();
226
227                data = data.select(Axis(dim_index), &indices).to_owned();
228
229                let new_dim_coords: Vec<String> = labels.iter().map(|l| l.to_string()).collect();
230                coords.insert(dim_name.to_string(), new_dim_coords);
231            }
232        }
233
234        // Process label selectors
235        let mut slice_info = Vec::new();
236        let mut dims_to_remove = Vec::new();
237        for dim_name in dims.iter() {
238            if let Some(Selector::Label(label)) = selectors.get(dim_name.as_str()) {
239                let current_coords = coords
240                    .get(dim_name)
241                    .expect("Coordinates not found for dimension");
242                let index = current_coords
243                    .iter()
244                    .position(|c| c == *label)
245                    .expect("Label not found");
246                slice_info.push(SliceInfoElem::Index(index as isize));
247                dims_to_remove.push(dim_name.clone());
248            } else {
249                slice_info.push(SliceInfoElem::Slice {
250                    start: 0,
251                    end: None,
252                    step: 1,
253                });
254            }
255        }
256
257        let sliced_data = data.slice(slice_info.as_slice()).to_owned();
258
259        dims.retain(|d| !dims_to_remove.contains(d));
260        coords.retain(|k, _| !dims_to_remove.contains(k));
261
262        Self {
263            data: sliced_data,
264            dims,
265            coords,
266        }
267    }
268
269    /// Create a new LabeledArray with updated coordinates
270    ///
271    /// Returns an error if the coordinate lengths don't match dimension sizes
272    pub fn with_coords(self, coords: HashMap<String, Vec<f64>>) -> Result<Self, String> {
273        let mut new_coords: HashMap<String, Vec<String>> = HashMap::new();
274
275        for (dim, coord_vec) in coords {
276            if let Some(dim_index) = self.dim_index(&dim) {
277                if coord_vec.len() != self.shape()[dim_index] {
278                    return Err(format!(
279                        "Coordinate length for dimension '{}' must match dimension size",
280                        dim
281                    ));
282                }
283                // Convert f64 coordinates to strings
284                let coord_strings: Vec<String> = coord_vec.iter().map(|c| c.to_string()).collect();
285                new_coords.insert(dim, coord_strings);
286            } else {
287                return Err(format!("Dimension '{}' not found", dim));
288            }
289        }
290
291        Ok(LabeledArray {
292            data: self.data,
293            dims: self.dims,
294            coords: new_coords,
295        })
296    }
297}
298
299impl<T> LabeledArray<T>
300where
301    T: Clone + std::fmt::Display,
302{
303    /// Pretty print the array with dimension information
304    pub fn info(&self) -> String {
305        let mut info = String::new();
306        info.push_str(&format!("LabeledArray<{}>\n", std::any::type_name::<T>()));
307        info.push_str(&format!("Dimensions: {:?}\n", self.dims));
308        info.push_str(&format!("Shape: {:?}\n", self.shape()));
309
310        if !self.coords.is_empty() {
311            info.push_str("Coordinates:\n");
312            for (dim, coords) in &self.coords {
313                info.push_str(&format!("  {}: {} labels\n", dim, coords.len()));
314            }
315        }
316
317        info
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324    // Keep only focused unit tests in this file. Integration-style tests live under `tests/`.
325
326    #[test]
327    fn test_new_labeled_array() {
328        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
329            ArrayD::from_shape_vec(vec![2, 3], vec![1, 2, 3, 4, 5, 6]).unwrap();
330        let dims: Vec<String> = vec!["y".to_string(), "x".to_string()];
331        let _array: LabeledArray<i32> = LabeledArray::new(data, dims);
332    }
333
334    #[test]
335    #[should_panic(
336        expected = "Number of dimension names must match the number of dimensions in data"
337    )]
338    fn test_new_labeled_array_dimension_mismatch() {
339        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
340            ArrayD::from_shape_vec(vec![2, 3], vec![1, 2, 3, 4, 5, 6]).unwrap();
341        let dims: Vec<String> = vec!["y".to_string()]; // Only 1 dimension name for 2D data
342        let _array: LabeledArray<i32> = LabeledArray::new(data, dims);
343    }
344
345    #[test]
346    fn test_dim_index_and_missing() {
347        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
348            ArrayD::from_shape_vec(vec![2, 3, 4], vec![0; 24]).unwrap();
349        let dims: Vec<String> = vec!["time".to_string(), "y".to_string(), "x".to_string()];
350        let array: LabeledArray<i32> = LabeledArray::new(data, dims);
351
352        assert_eq!(array.dim_index("time"), Some(0));
353        assert_eq!(array.dim_index("band"), None);
354    }
355
356    #[test]
357    fn test_data_mut_roundtrip() {
358        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
359            ArrayD::from_shape_vec(vec![2, 2], vec![1, 2, 3, 4]).unwrap();
360        let dims: Vec<String> = vec!["y".to_string(), "x".to_string()];
361        let mut array: LabeledArray<i32> = LabeledArray::new(data, dims);
362
363        // mutate the data through data_mut and ensure change observed
364        {
365            let dm = array.data_mut();
366            let slice = dm.as_slice_mut().unwrap();
367            slice[0] = 99;
368        }
369
370        assert_eq!(array.data().as_slice().unwrap()[0], 99);
371    }
372
373    #[test]
374    #[should_panic(expected = "Dimension not found")]
375    fn test_set_coords_nonexistent_dimension_panics() {
376        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
377            ArrayD::from_shape_vec(vec![2], vec![1, 2]).unwrap();
378        let dims: Vec<String> = vec!["a".to_string()];
379        let mut array: LabeledArray<i32> = LabeledArray::new(data, dims);
380
381        // setting coords for a dimension that doesn't exist should panic
382        array.set_coords("b", vec!["x".to_string(), "y".to_string()]);
383    }
384
385    #[test]
386    fn test_dims_and_all_coords_empty() {
387        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
388            ArrayD::from_shape_vec(vec![2, 2], vec![1, 2, 3, 4]).unwrap();
389        let dims: Vec<String> = vec!["y".to_string(), "x".to_string()];
390        let array: LabeledArray<i32> = LabeledArray::new(data, dims.clone());
391
392        // dims() should return the same sequence
393        assert_eq!(array.dims(), &dims);
394        // all_coords should be empty for a new array
395        assert!(array.all_coords().is_empty());
396    }
397
398    #[test]
399    fn test_with_coords_success_and_all_coords() {
400        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
401            ArrayD::from_shape_vec(vec![2, 3], vec![1, 2, 3, 4, 5, 6]).unwrap();
402        let dims: Vec<String> = vec!["y".to_string(), "x".to_string()];
403
404        let mut numeric_coords: HashMap<String, Vec<f64>> = HashMap::new();
405        numeric_coords.insert("y".to_string(), vec![0.0, 10.0]);
406        numeric_coords.insert("x".to_string(), vec![0.0, 10.0, 20.0]);
407
408        let array: LabeledArray<i32> = LabeledArray::new(data, dims);
409        let result: LabeledArray<i32> = array
410            .with_coords(numeric_coords)
411            .expect("with_coords should succeed");
412
413        // all_coords should now contain entries for y and x
414        assert_eq!(result.coords("y").unwrap().len(), 2);
415        assert_eq!(result.coords("x").unwrap().len(), 3);
416    }
417
418    #[test]
419    fn test_with_coords_wrong_length_returns_err() {
420        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
421            ArrayD::from_shape_vec(vec![2, 3], vec![1, 2, 3, 4, 5, 6]).unwrap();
422        let dims: Vec<String> = vec!["y".to_string(), "x".to_string()];
423
424        let mut numeric_coords: HashMap<String, Vec<f64>> = HashMap::new();
425        numeric_coords.insert("y".to_string(), vec![0.0]); // wrong length
426
427        let array: LabeledArray<i32> = LabeledArray::new(data, dims);
428        let err: String = array.with_coords(numeric_coords).unwrap_err();
429        assert!(err.contains("must match dimension size"));
430    }
431
432    #[test]
433    fn test_set_coords_success() {
434        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
435            ArrayD::from_shape_vec(vec![2], vec![1, 2]).unwrap();
436        let dims: Vec<String> = vec!["a".to_string()];
437        let mut array: LabeledArray<i32> = LabeledArray::new(data, dims);
438
439        array.set_coords("a", vec!["x".to_string(), "y".to_string()]);
440        assert_eq!(
441            array.coords("a").unwrap(),
442            &vec!["x".to_string(), "y".to_string()]
443        );
444        // ensure select_by_label finds the label
445        assert_eq!(array.select_by_label("a", "y"), Some(1));
446    }
447
448    #[test]
449    fn test_with_coords_dimension_not_found_returns_err() {
450        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
451            ArrayD::from_shape_vec(vec![2], vec![1, 2]).unwrap();
452        let dims: Vec<String> = vec!["a".to_string()];
453
454        let mut numeric_coords: HashMap<String, Vec<f64>> = HashMap::new();
455        numeric_coords.insert("z".to_string(), vec![0.0, 1.0]); // 'z' is not a dimension
456
457        let array: LabeledArray<i32> = LabeledArray::new(data, dims);
458        let err = array.with_coords(numeric_coords).unwrap_err();
459        assert!(err.contains("not found"));
460    }
461
462    #[test]
463    fn test_info_no_coords() {
464        let data: ndarray::ArrayBase<ndarray::OwnedRepr<i32>, ndarray::Dim<ndarray::IxDynImpl>> =
465            ArrayD::from_shape_vec(vec![1, 2], vec![1, 2]).unwrap();
466        let dims: Vec<String> = vec!["y".to_string(), "x".to_string()];
467        let array: LabeledArray<i32> = LabeledArray::new(data, dims);
468
469        let info = array.info();
470        assert!(info.contains("LabeledArray"));
471        assert!(info.contains("Dimensions"));
472        assert!(info.contains("Shape"));
473        // since no coords were set, the string should not contain the 'Coordinates:' header
474        assert!(!info.contains("Coordinates:"));
475    }
476}