Skip to main content

oxigdal_netcdf/
variable.rs

1//! NetCDF variable types and utilities.
2//!
3//! Variables store the actual data in NetCDF files. They have dimensions,
4//! attributes, and a data type.
5
6use serde::{Deserialize, Serialize};
7
8use crate::attribute::Attributes;
9use crate::dimension::Dimensions;
10use crate::error::{NetCdfError, Result};
11use oxigdal_core::error::OxiGdalError;
12
13/// Data types supported by NetCDF.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
15pub enum DataType {
16    /// 8-bit signed integer
17    I8,
18    /// 8-bit unsigned integer
19    U8,
20    /// 16-bit signed integer
21    I16,
22    /// 16-bit unsigned integer (NetCDF-4 only)
23    U16,
24    /// 32-bit signed integer
25    I32,
26    /// 32-bit unsigned integer (NetCDF-4 only)
27    U32,
28    /// 64-bit signed integer (NetCDF-4 only)
29    I64,
30    /// 64-bit unsigned integer (NetCDF-4 only)
31    U64,
32    /// 32-bit floating point
33    F32,
34    /// 64-bit floating point
35    F64,
36    /// Text/character
37    Char,
38    /// String (NetCDF-4 only)
39    String,
40}
41
42impl DataType {
43    /// Get the size in bytes.
44    #[must_use]
45    pub const fn size(&self) -> usize {
46        match self {
47            Self::I8 | Self::U8 | Self::Char => 1,
48            Self::I16 | Self::U16 => 2,
49            Self::I32 | Self::U32 | Self::F32 => 4,
50            Self::I64 | Self::U64 | Self::F64 => 8,
51            Self::String => 0, // Variable size
52        }
53    }
54
55    /// Get the type name.
56    #[must_use]
57    pub const fn name(&self) -> &'static str {
58        match self {
59            Self::I8 => "i8",
60            Self::U8 => "u8",
61            Self::I16 => "i16",
62            Self::U16 => "u16",
63            Self::I32 => "i32",
64            Self::U32 => "u32",
65            Self::I64 => "i64",
66            Self::U64 => "u64",
67            Self::F32 => "f32",
68            Self::F64 => "f64",
69            Self::Char => "char",
70            Self::String => "string",
71        }
72    }
73
74    /// Check if this is a floating point type.
75    #[must_use]
76    pub const fn is_float(&self) -> bool {
77        matches!(self, Self::F32 | Self::F64)
78    }
79
80    /// Check if this is an integer type.
81    #[must_use]
82    pub const fn is_integer(&self) -> bool {
83        matches!(
84            self,
85            Self::I8
86                | Self::U8
87                | Self::I16
88                | Self::U16
89                | Self::I32
90                | Self::U32
91                | Self::I64
92                | Self::U64
93        )
94    }
95
96    /// Check if this is a signed type.
97    #[must_use]
98    pub const fn is_signed(&self) -> bool {
99        matches!(
100            self,
101            Self::I8 | Self::I16 | Self::I32 | Self::I64 | Self::F32 | Self::F64
102        )
103    }
104
105    /// Check if this is available in NetCDF-3.
106    #[must_use]
107    pub const fn is_netcdf3_compatible(&self) -> bool {
108        matches!(
109            self,
110            Self::I8 | Self::I16 | Self::I32 | Self::F32 | Self::F64 | Self::Char
111        )
112    }
113}
114
115/// A variable in a NetCDF file.
116#[derive(Debug, Clone, Serialize, Deserialize)]
117pub struct Variable {
118    /// Name of the variable
119    name: String,
120    /// Data type
121    data_type: DataType,
122    /// Dimension names (in order)
123    dimension_names: Vec<String>,
124    /// Attributes
125    attributes: Attributes,
126    /// Whether this is a coordinate variable
127    is_coordinate: bool,
128}
129
130impl Variable {
131    /// Create a new variable.
132    ///
133    /// # Arguments
134    ///
135    /// * `name` - Name of the variable
136    /// * `data_type` - Data type
137    /// * `dimension_names` - Names of dimensions (in order)
138    ///
139    /// # Errors
140    ///
141    /// Returns error if the name is empty.
142    pub fn new(
143        name: impl Into<String>,
144        data_type: DataType,
145        dimension_names: Vec<String>,
146    ) -> Result<Self> {
147        let name = name.into();
148        if name.is_empty() {
149            return Err(NetCdfError::Core(
150                OxiGdalError::invalid_parameter_builder("name", "Variable name cannot be empty")
151                    .with_operation("create_netcdf_variable")
152                    .with_parameter("data_type", format!("{:?}", data_type))
153                    .with_parameter("num_dimensions", dimension_names.len().to_string())
154                    .with_suggestion("Provide a non-empty variable name")
155                    .build(),
156            ));
157        }
158        Ok(Self {
159            name,
160            data_type,
161            dimension_names,
162            attributes: Attributes::new(),
163            is_coordinate: false,
164        })
165    }
166
167    /// Create a coordinate variable.
168    ///
169    /// A coordinate variable has the same name as its dimension.
170    ///
171    /// # Arguments
172    ///
173    /// * `name` - Name of the variable and dimension
174    /// * `data_type` - Data type
175    ///
176    /// # Errors
177    ///
178    /// Returns error if the name is empty.
179    pub fn new_coordinate(name: impl Into<String>, data_type: DataType) -> Result<Self> {
180        let name = name.into();
181        if name.is_empty() {
182            return Err(NetCdfError::Core(
183                OxiGdalError::invalid_parameter_builder(
184                    "name",
185                    "Coordinate variable name cannot be empty",
186                )
187                .with_operation("create_coordinate_variable")
188                .with_parameter("data_type", format!("{:?}", data_type))
189                .with_suggestion("Provide a non-empty coordinate variable name")
190                .build(),
191            ));
192        }
193        let dimension_names = vec![name.clone()];
194        Ok(Self {
195            name,
196            data_type,
197            dimension_names,
198            attributes: Attributes::new(),
199            is_coordinate: true,
200        })
201    }
202
203    /// Get the variable name.
204    #[must_use]
205    pub fn name(&self) -> &str {
206        &self.name
207    }
208
209    /// Get the data type.
210    #[must_use]
211    pub const fn data_type(&self) -> DataType {
212        self.data_type
213    }
214
215    /// Get the dimension names.
216    #[must_use]
217    pub fn dimension_names(&self) -> &[String] {
218        &self.dimension_names
219    }
220
221    /// Get the number of dimensions.
222    #[must_use]
223    pub fn ndims(&self) -> usize {
224        self.dimension_names.len()
225    }
226
227    /// Check if this is a scalar variable (no dimensions).
228    #[must_use]
229    pub fn is_scalar(&self) -> bool {
230        self.dimension_names.is_empty()
231    }
232
233    /// Check if this is a coordinate variable.
234    #[must_use]
235    pub const fn is_coordinate(&self) -> bool {
236        self.is_coordinate
237    }
238
239    /// Set whether this is a coordinate variable.
240    pub fn set_coordinate(&mut self, is_coordinate: bool) {
241        self.is_coordinate = is_coordinate;
242    }
243
244    /// Get the attributes.
245    #[must_use]
246    pub const fn attributes(&self) -> &Attributes {
247        &self.attributes
248    }
249
250    /// Get mutable access to attributes.
251    pub fn attributes_mut(&mut self) -> &mut Attributes {
252        &mut self.attributes
253    }
254
255    /// Get the shape based on dimensions.
256    ///
257    /// # Errors
258    ///
259    /// Returns error if any dimension is not found.
260    pub fn shape(&self, dimensions: &Dimensions) -> Result<Vec<usize>> {
261        self.dimension_names
262            .iter()
263            .map(|name| {
264                dimensions
265                    .get(name)
266                    .map(|d| d.len())
267                    .ok_or_else(|| NetCdfError::DimensionNotFound { name: name.clone() })
268            })
269            .collect()
270    }
271
272    /// Get the total size based on dimensions.
273    ///
274    /// # Errors
275    ///
276    /// Returns error if any dimension is not found or if size overflows.
277    pub fn size(&self, dimensions: &Dimensions) -> Result<usize> {
278        if self.is_scalar() {
279            return Ok(1);
280        }
281
282        let shape = self.shape(dimensions)?;
283        shape
284            .iter()
285            .try_fold(1usize, |acc, &size| acc.checked_mul(size))
286            .ok_or_else(|| {
287                NetCdfError::Core(
288                    OxiGdalError::io_error_builder("NetCDF variable size overflow")
289                        .with_operation("calculate_variable_size")
290                        .with_parameter("variable", &self.name)
291                        .with_parameter("ndims", self.dimension_names.len().to_string())
292                        .with_suggestion(
293                            "Variable dimensions result in size overflow. Check dimension sizes",
294                        )
295                        .build(),
296                )
297            })
298    }
299
300    /// Get the size in bytes based on dimensions.
301    ///
302    /// # Errors
303    ///
304    /// Returns error if any dimension is not found or if size overflows.
305    pub fn size_bytes(&self, dimensions: &Dimensions) -> Result<usize> {
306        let element_size = self.data_type.size();
307        if element_size == 0 {
308            return Err(NetCdfError::Core(
309                OxiGdalError::not_supported_builder("Variable-length data type size calculation")
310                    .with_operation("calculate_variable_size_bytes")
311                    .with_parameter("variable", &self.name)
312                    .with_parameter("data_type", format!("{:?}", self.data_type))
313                    .with_suggestion("Variable-length types require special handling")
314                    .build(),
315            ));
316        }
317
318        let num_elements = self.size(dimensions)?;
319        num_elements.checked_mul(element_size).ok_or_else(|| {
320            NetCdfError::Core(
321                OxiGdalError::io_error_builder("NetCDF variable byte size overflow")
322                    .with_operation("calculate_variable_size_bytes")
323                    .with_parameter("variable", &self.name)
324                    .with_parameter("element_size", element_size.to_string())
325                    .with_parameter("num_elements", num_elements.to_string())
326                    .with_suggestion(
327                        "Variable size exceeds maximum. Reduce dimensions or use chunking",
328                    )
329                    .build(),
330            )
331        })
332    }
333
334    /// Check if compatible with NetCDF-3.
335    #[must_use]
336    pub fn is_netcdf3_compatible(&self) -> bool {
337        self.data_type.is_netcdf3_compatible()
338    }
339}
340
341/// Collection of variables.
342#[derive(Debug, Clone, Default, Serialize, Deserialize)]
343pub struct Variables {
344    variables: Vec<Variable>,
345}
346
347impl Variables {
348    /// Create a new empty variable collection.
349    #[must_use]
350    pub const fn new() -> Self {
351        Self {
352            variables: Vec::new(),
353        }
354    }
355
356    /// Create from a vector of variables.
357    #[must_use]
358    pub const fn from_vec(variables: Vec<Variable>) -> Self {
359        Self { variables }
360    }
361
362    /// Add a variable.
363    ///
364    /// # Errors
365    ///
366    /// Returns error if a variable with the same name already exists.
367    pub fn add(&mut self, variable: Variable) -> Result<()> {
368        if self.contains(variable.name()) {
369            return Err(NetCdfError::Core(
370                OxiGdalError::invalid_parameter_builder("variable", "Variable already exists")
371                    .with_operation("add_netcdf_variable")
372                    .with_parameter("variable_name", variable.name())
373                    .with_parameter("data_type", format!("{:?}", variable.data_type()))
374                    .with_suggestion("Use a unique variable name or retrieve existing variable")
375                    .build(),
376            ));
377        }
378        self.variables.push(variable);
379        Ok(())
380    }
381
382    /// Get a variable by name.
383    #[must_use]
384    pub fn get(&self, name: &str) -> Option<&Variable> {
385        self.variables.iter().find(|v| v.name() == name)
386    }
387
388    /// Get a mutable reference to a variable by name.
389    pub fn get_mut(&mut self, name: &str) -> Option<&mut Variable> {
390        self.variables.iter_mut().find(|v| v.name() == name)
391    }
392
393    /// Get a variable by index.
394    #[must_use]
395    pub fn get_by_index(&self, index: usize) -> Option<&Variable> {
396        self.variables.get(index)
397    }
398
399    /// Check if a variable exists.
400    #[must_use]
401    pub fn contains(&self, name: &str) -> bool {
402        self.variables.iter().any(|v| v.name() == name)
403    }
404
405    /// Get the number of variables.
406    #[must_use]
407    pub fn len(&self) -> usize {
408        self.variables.len()
409    }
410
411    /// Check if empty.
412    #[must_use]
413    pub fn is_empty(&self) -> bool {
414        self.variables.is_empty()
415    }
416
417    /// Get an iterator over variables.
418    pub fn iter(&self) -> impl Iterator<Item = &Variable> {
419        self.variables.iter()
420    }
421
422    /// Get names of all variables.
423    #[must_use]
424    pub fn names(&self) -> Vec<&str> {
425        self.variables.iter().map(|v| v.name()).collect()
426    }
427
428    /// Get coordinate variables.
429    pub fn coordinates(&self) -> impl Iterator<Item = &Variable> {
430        self.variables.iter().filter(|v| v.is_coordinate())
431    }
432
433    /// Get data variables (non-coordinate).
434    pub fn data_variables(&self) -> impl Iterator<Item = &Variable> {
435        self.variables.iter().filter(|v| !v.is_coordinate())
436    }
437}
438
439impl IntoIterator for Variables {
440    type Item = Variable;
441    type IntoIter = std::vec::IntoIter<Variable>;
442
443    fn into_iter(self) -> Self::IntoIter {
444        self.variables.into_iter()
445    }
446}
447
448impl<'a> IntoIterator for &'a Variables {
449    type Item = &'a Variable;
450    type IntoIter = std::slice::Iter<'a, Variable>;
451
452    fn into_iter(self) -> Self::IntoIter {
453        self.variables.iter()
454    }
455}
456
457impl FromIterator<Variable> for Variables {
458    fn from_iter<T: IntoIterator<Item = Variable>>(iter: T) -> Self {
459        Self {
460            variables: iter.into_iter().collect(),
461        }
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use super::*;
468    use crate::dimension::Dimension;
469
470    #[test]
471    fn test_data_type_properties() {
472        assert_eq!(DataType::F32.size(), 4);
473        assert_eq!(DataType::F64.size(), 8);
474        assert!(DataType::F32.is_float());
475        assert!(DataType::I32.is_integer());
476        assert!(DataType::I32.is_signed());
477        assert!(!DataType::U32.is_signed());
478    }
479
480    #[test]
481    fn test_netcdf3_compatibility() {
482        assert!(DataType::I8.is_netcdf3_compatible());
483        assert!(DataType::I16.is_netcdf3_compatible());
484        assert!(DataType::I32.is_netcdf3_compatible());
485        assert!(DataType::F32.is_netcdf3_compatible());
486        assert!(DataType::F64.is_netcdf3_compatible());
487        assert!(!DataType::U16.is_netcdf3_compatible());
488        assert!(!DataType::U32.is_netcdf3_compatible());
489        assert!(!DataType::String.is_netcdf3_compatible());
490    }
491
492    #[test]
493    fn test_variable_creation() {
494        let var = Variable::new(
495            "temperature",
496            DataType::F32,
497            vec!["time".to_string(), "lat".to_string(), "lon".to_string()],
498        )
499        .expect("Failed to create temperature variable");
500        assert_eq!(var.name(), "temperature");
501        assert_eq!(var.data_type(), DataType::F32);
502        assert_eq!(var.ndims(), 3);
503        assert!(!var.is_scalar());
504        assert!(!var.is_coordinate());
505    }
506
507    #[test]
508    fn test_coordinate_variable() {
509        let var = Variable::new_coordinate("time", DataType::F64)
510            .expect("Failed to create coordinate variable");
511        assert_eq!(var.name(), "time");
512        assert!(var.is_coordinate());
513        assert_eq!(var.ndims(), 1);
514        assert_eq!(var.dimension_names()[0], "time");
515    }
516
517    #[test]
518    fn test_scalar_variable() {
519        let var = Variable::new("global_average", DataType::F32, vec![])
520            .expect("Failed to create scalar variable");
521        assert!(var.is_scalar());
522        assert_eq!(var.ndims(), 0);
523    }
524
525    #[test]
526    fn test_variable_shape() {
527        let mut dims = Dimensions::new();
528        dims.add(Dimension::new("time", 10).expect("Failed to create time dimension"))
529            .expect("Failed to add time dimension");
530        dims.add(Dimension::new("lat", 180).expect("Failed to create lat dimension"))
531            .expect("Failed to add lat dimension");
532        dims.add(Dimension::new("lon", 360).expect("Failed to create lon dimension"))
533            .expect("Failed to add lon dimension");
534
535        let var = Variable::new(
536            "temperature",
537            DataType::F32,
538            vec!["time".to_string(), "lat".to_string(), "lon".to_string()],
539        )
540        .expect("Failed to create temperature variable");
541
542        let shape = var.shape(&dims).expect("Failed to get variable shape");
543        assert_eq!(shape, vec![10, 180, 360]);
544
545        let size = var.size(&dims).expect("Failed to get variable size");
546        assert_eq!(size, 10 * 180 * 360);
547
548        let size_bytes = var
549            .size_bytes(&dims)
550            .expect("Failed to get variable size in bytes");
551        assert_eq!(size_bytes, 10 * 180 * 360 * 4);
552    }
553
554    #[test]
555    fn test_variable_collection() {
556        let mut vars = Variables::new();
557        vars.add(
558            Variable::new_coordinate("time", DataType::F64)
559                .expect("Failed to create time coordinate variable"),
560        )
561        .expect("Failed to add time coordinate variable");
562        vars.add(
563            Variable::new("temperature", DataType::F32, vec!["time".to_string()])
564                .expect("Failed to create temperature variable"),
565        )
566        .expect("Failed to add temperature variable");
567
568        assert_eq!(vars.len(), 2);
569        assert!(vars.contains("time"));
570        assert!(vars.contains("temperature"));
571
572        let coords: Vec<_> = vars.coordinates().collect();
573        assert_eq!(coords.len(), 1);
574        assert_eq!(coords[0].name(), "time");
575
576        let data_vars: Vec<_> = vars.data_variables().collect();
577        assert_eq!(data_vars.len(), 1);
578        assert_eq!(data_vars[0].name(), "temperature");
579    }
580
581    #[test]
582    fn test_empty_variable_name() {
583        let result = Variable::new("", DataType::F32, vec![]);
584        assert!(result.is_err());
585    }
586
587    #[test]
588    fn test_duplicate_variable() {
589        let mut vars = Variables::new();
590        vars.add(
591            Variable::new("test", DataType::F32, vec![]).expect("Failed to create test variable"),
592        )
593        .expect("Failed to add test variable");
594        let result = vars.add(
595            Variable::new("test", DataType::F64, vec![])
596                .expect("Failed to create duplicate test variable"),
597        );
598        assert!(result.is_err());
599    }
600}