Skip to main content

array_format/
dtype.rs

1//! Data type definitions for array elements.
2//!
3//! The format supports fixed-width primitives, variable-length values
4//! (`String`, `Binary`), and list types (`FixedSizeList`, `List`).
5
6use rkyv::{Archive, Deserialize, Serialize};
7
8/// Describes the element type of an array.
9#[derive(Debug, Clone, PartialEq, Archive, Serialize, Deserialize)]
10#[rkyv(serialize_bounds(
11    __S: rkyv::ser::Writer + rkyv::ser::Allocator,
12    __S::Error: rkyv::rancor::Source,
13))]
14#[rkyv(deserialize_bounds(__D::Error: rkyv::rancor::Source))]
15#[rkyv(bytecheck(bounds(
16    __C: rkyv::validation::ArchiveContext,
17    <__C as rkyv::rancor::Fallible>::Error: rkyv::rancor::Source,
18)))]
19pub enum DType {
20    /// Boolean stored as 1 byte (`0` or `1`).
21    Bool,
22    /// Signed 8-bit integer.
23    Int8,
24    /// Signed 16-bit integer (little-endian).
25    Int16,
26    /// Signed 32-bit integer (little-endian).
27    Int32,
28    /// Signed 64-bit integer (little-endian).
29    Int64,
30    /// Unsigned 8-bit integer.
31    UInt8,
32    /// Unsigned 16-bit integer (little-endian).
33    UInt16,
34    /// Unsigned 32-bit integer (little-endian).
35    UInt32,
36    /// Unsigned 64-bit integer (little-endian).
37    UInt64,
38    /// 32-bit IEEE 754 floating point (little-endian).
39    Float32,
40    /// 64-bit IEEE 754 floating point (little-endian).
41    Float64,
42    /// Variable-length UTF-8 string.
43    ///
44    /// Encoded as an offsets buffer (`N + 1` entries) followed by a
45    /// concatenated values buffer.
46    String,
47    /// Variable-length binary data (vlen bytes).
48    ///
49    /// Encoded identically to [`DType::String`] but without UTF-8 semantics.
50    Binary,
51    /// Fixed-size list where every element contains exactly `size` children.
52    ///
53    /// No offsets buffer is needed; child values are stored contiguously.
54    FixedSizeList {
55        /// Element type of the child values.
56        #[rkyv(omit_bounds)]
57        child: Box<DType>,
58        /// Number of child values per list element.
59        size: u32,
60    },
61    /// Variable-length list where each element can have a different number of children.
62    ///
63    /// Encoded with a parent offsets buffer (`N + 1` entries) and a concatenated
64    /// child values buffer.
65    List {
66        /// Element type of the child values.
67        #[rkyv(omit_bounds)]
68        child: Box<DType>,
69    },
70    /// 64-bit nanosecond timestamp since the Unix epoch (little-endian).
71    ///
72    /// Byte-identical to [`DType::Int64`] but carries the
73    /// "nanoseconds since 1970-01-01 UTC" meaning in the type system.
74    TimestampNs,
75}
76
77impl DType {
78    /// Returns the byte size of a single element for fixed-width types,
79    /// or `None` for variable-length types.
80    pub fn element_size(&self) -> Option<usize> {
81        match self {
82            DType::Bool | DType::Int8 | DType::UInt8 => Some(1),
83            DType::Int16 | DType::UInt16 => Some(2),
84            DType::Int32 | DType::UInt32 | DType::Float32 => Some(4),
85            DType::Int64 | DType::UInt64 | DType::Float64 | DType::TimestampNs => Some(8),
86            DType::FixedSizeList { child, size } => {
87                child.element_size().map(|cs| cs * (*size as usize))
88            }
89            DType::String | DType::Binary | DType::List { .. } => None,
90        }
91    }
92
93    /// Returns `true` if the type requires an offsets buffer
94    /// (i.e. it is variable-length).
95    pub fn is_variable_length(&self) -> bool {
96        self.element_size().is_none()
97    }
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103
104    #[test]
105    fn fixed_element_sizes() {
106        assert_eq!(DType::Bool.element_size(), Some(1));
107        assert_eq!(DType::Int8.element_size(), Some(1));
108        assert_eq!(DType::Int16.element_size(), Some(2));
109        assert_eq!(DType::Int32.element_size(), Some(4));
110        assert_eq!(DType::Int64.element_size(), Some(8));
111        assert_eq!(DType::UInt8.element_size(), Some(1));
112        assert_eq!(DType::UInt16.element_size(), Some(2));
113        assert_eq!(DType::UInt32.element_size(), Some(4));
114        assert_eq!(DType::UInt64.element_size(), Some(8));
115        assert_eq!(DType::Float32.element_size(), Some(4));
116        assert_eq!(DType::Float64.element_size(), Some(8));
117        assert_eq!(DType::TimestampNs.element_size(), Some(8));
118    }
119
120    #[test]
121    fn variable_length_types() {
122        assert!(DType::String.is_variable_length());
123        assert!(DType::Binary.is_variable_length());
124        assert!(
125            DType::List {
126                child: Box::new(DType::Int32)
127            }
128            .is_variable_length()
129        );
130    }
131
132    #[test]
133    fn fixed_size_list_element_size() {
134        let dt = DType::FixedSizeList {
135            child: Box::new(DType::Int16),
136            size: 3,
137        };
138        assert_eq!(dt.element_size(), Some(6));
139        assert!(!dt.is_variable_length());
140    }
141
142    #[test]
143    fn nested_fixed_size_list() {
144        let inner = DType::FixedSizeList {
145            child: Box::new(DType::UInt8),
146            size: 4,
147        };
148        let outer = DType::FixedSizeList {
149            child: Box::new(inner),
150            size: 2,
151        };
152        assert_eq!(outer.element_size(), Some(8));
153    }
154
155    #[test]
156    fn fixed_size_list_with_vlen_child() {
157        let dt = DType::FixedSizeList {
158            child: Box::new(DType::String),
159            size: 3,
160        };
161        assert!(dt.is_variable_length());
162        assert_eq!(dt.element_size(), None);
163    }
164}