Skip to main content

array_format/
dtype.rs

1//! Data type definitions for array elements.
2//!
3//! The format supports fixed-width primitives, variable-length values
4//! (`String`, `Binary`), and list types (`FixedSizeList`, `List`).
5
6// `DType` has named-field struct variants; rkyv's `Archive` derive generates an
7// archived type whose fields carry no docs and cannot be annotated, so scope
8// `missing_docs` off for this module. Every public source item here is documented.
9#![allow(missing_docs)]
10
11use rkyv::{Archive, Deserialize, Serialize};
12
13/// Describes the element type of an array.
14#[derive(Debug, Clone, PartialEq, Archive, Serialize, Deserialize)]
15#[rkyv(serialize_bounds(
16    __S: rkyv::ser::Writer + rkyv::ser::Allocator,
17    __S::Error: rkyv::rancor::Source,
18))]
19#[rkyv(deserialize_bounds(__D::Error: rkyv::rancor::Source))]
20#[rkyv(bytecheck(bounds(
21    __C: rkyv::validation::ArchiveContext,
22    <__C as rkyv::rancor::Fallible>::Error: rkyv::rancor::Source,
23)))]
24pub enum DType {
25    /// Boolean stored as 1 byte (`0` or `1`).
26    Bool,
27    /// Signed 8-bit integer.
28    Int8,
29    /// Signed 16-bit integer (little-endian).
30    Int16,
31    /// Signed 32-bit integer (little-endian).
32    Int32,
33    /// Signed 64-bit integer (little-endian).
34    Int64,
35    /// Unsigned 8-bit integer.
36    UInt8,
37    /// Unsigned 16-bit integer (little-endian).
38    UInt16,
39    /// Unsigned 32-bit integer (little-endian).
40    UInt32,
41    /// Unsigned 64-bit integer (little-endian).
42    UInt64,
43    /// 32-bit IEEE 754 floating point (little-endian).
44    Float32,
45    /// 64-bit IEEE 754 floating point (little-endian).
46    Float64,
47    /// Variable-length UTF-8 string.
48    ///
49    /// Encoded as an offsets buffer (`N + 1` entries) followed by a
50    /// concatenated values buffer.
51    String,
52    /// Variable-length binary data (vlen bytes).
53    ///
54    /// Encoded identically to [`DType::String`] but without UTF-8 semantics.
55    Binary,
56    /// Fixed-size list where every element contains exactly `size` children.
57    ///
58    /// No offsets buffer is needed; child values are stored contiguously.
59    FixedSizeList {
60        /// Element type of the child values.
61        #[rkyv(omit_bounds)]
62        child: Box<DType>,
63        /// Number of child values per list element.
64        size: u32,
65    },
66    /// Variable-length list where each element can have a different number of children.
67    ///
68    /// Encoded with a parent offsets buffer (`N + 1` entries) and a concatenated
69    /// child values buffer.
70    List {
71        /// Element type of the child values.
72        #[rkyv(omit_bounds)]
73        child: Box<DType>,
74    },
75    /// 64-bit nanosecond timestamp since the Unix epoch (little-endian).
76    ///
77    /// Byte-identical to [`DType::Int64`] but carries the
78    /// "nanoseconds since 1970-01-01 UTC" meaning in the type system.
79    TimestampNs,
80}
81
82impl DType {
83    /// Returns the byte size of a single element for fixed-width types,
84    /// or `None` for variable-length types.
85    pub fn element_size(&self) -> Option<usize> {
86        match self {
87            DType::Bool | DType::Int8 | DType::UInt8 => Some(1),
88            DType::Int16 | DType::UInt16 => Some(2),
89            DType::Int32 | DType::UInt32 | DType::Float32 => Some(4),
90            DType::Int64 | DType::UInt64 | DType::Float64 | DType::TimestampNs => Some(8),
91            DType::FixedSizeList { child, size } => {
92                child.element_size().map(|cs| cs * (*size as usize))
93            }
94            DType::String | DType::Binary | DType::List { .. } => None,
95        }
96    }
97
98    /// Returns `true` if the type requires an offsets buffer
99    /// (i.e. it is variable-length).
100    pub fn is_variable_length(&self) -> bool {
101        self.element_size().is_none()
102    }
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn fixed_element_sizes() {
111        assert_eq!(DType::Bool.element_size(), Some(1));
112        assert_eq!(DType::Int8.element_size(), Some(1));
113        assert_eq!(DType::Int16.element_size(), Some(2));
114        assert_eq!(DType::Int32.element_size(), Some(4));
115        assert_eq!(DType::Int64.element_size(), Some(8));
116        assert_eq!(DType::UInt8.element_size(), Some(1));
117        assert_eq!(DType::UInt16.element_size(), Some(2));
118        assert_eq!(DType::UInt32.element_size(), Some(4));
119        assert_eq!(DType::UInt64.element_size(), Some(8));
120        assert_eq!(DType::Float32.element_size(), Some(4));
121        assert_eq!(DType::Float64.element_size(), Some(8));
122        assert_eq!(DType::TimestampNs.element_size(), Some(8));
123    }
124
125    #[test]
126    fn variable_length_types() {
127        assert!(DType::String.is_variable_length());
128        assert!(DType::Binary.is_variable_length());
129        assert!(
130            DType::List {
131                child: Box::new(DType::Int32)
132            }
133            .is_variable_length()
134        );
135    }
136
137    #[test]
138    fn fixed_size_list_element_size() {
139        let dt = DType::FixedSizeList {
140            child: Box::new(DType::Int16),
141            size: 3,
142        };
143        assert_eq!(dt.element_size(), Some(6));
144        assert!(!dt.is_variable_length());
145    }
146
147    #[test]
148    fn nested_fixed_size_list() {
149        let inner = DType::FixedSizeList {
150            child: Box::new(DType::UInt8),
151            size: 4,
152        };
153        let outer = DType::FixedSizeList {
154            child: Box::new(inner),
155            size: 2,
156        };
157        assert_eq!(outer.element_size(), Some(8));
158    }
159
160    #[test]
161    fn fixed_size_list_with_vlen_child() {
162        let dt = DType::FixedSizeList {
163            child: Box::new(DType::String),
164            size: 3,
165        };
166        assert!(dt.is_variable_length());
167        assert_eq!(dt.element_size(), None);
168    }
169}