1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
//! Data type definitions for array elements.
//!
//! The format supports fixed-width primitives, variable-length values
//! (`String`, `Binary`), and list types (`FixedSizeList`, `List`).
use rkyv::{Archive, Deserialize, Serialize};
/// Describes the element type of an array.
#[derive(Debug, Clone, PartialEq, Archive, Serialize, Deserialize)]
#[rkyv(serialize_bounds(
__S: rkyv::ser::Writer + rkyv::ser::Allocator,
__S::Error: rkyv::rancor::Source,
))]
#[rkyv(deserialize_bounds(__D::Error: rkyv::rancor::Source))]
#[rkyv(bytecheck(bounds(
__C: rkyv::validation::ArchiveContext,
<__C as rkyv::rancor::Fallible>::Error: rkyv::rancor::Source,
)))]
pub enum DType {
/// Boolean stored as 1 byte (`0` or `1`).
Bool,
/// Signed 8-bit integer.
Int8,
/// Signed 16-bit integer (little-endian).
Int16,
/// Signed 32-bit integer (little-endian).
Int32,
/// Signed 64-bit integer (little-endian).
Int64,
/// Unsigned 8-bit integer.
UInt8,
/// Unsigned 16-bit integer (little-endian).
UInt16,
/// Unsigned 32-bit integer (little-endian).
UInt32,
/// Unsigned 64-bit integer (little-endian).
UInt64,
/// 32-bit IEEE 754 floating point (little-endian).
Float32,
/// 64-bit IEEE 754 floating point (little-endian).
Float64,
/// Variable-length UTF-8 string.
///
/// Encoded as an offsets buffer (`N + 1` entries) followed by a
/// concatenated values buffer.
String,
/// Variable-length binary data (vlen bytes).
///
/// Encoded identically to [`DType::String`] but without UTF-8 semantics.
Binary,
/// Fixed-size list where every element contains exactly `size` children.
///
/// No offsets buffer is needed; child values are stored contiguously.
FixedSizeList {
/// Element type of the child values.
#[rkyv(omit_bounds)]
child: Box<DType>,
/// Number of child values per list element.
size: u32,
},
/// Variable-length list where each element can have a different number of children.
///
/// Encoded with a parent offsets buffer (`N + 1` entries) and a concatenated
/// child values buffer.
List {
/// Element type of the child values.
#[rkyv(omit_bounds)]
child: Box<DType>,
},
/// 64-bit nanosecond timestamp since the Unix epoch (little-endian).
///
/// Byte-identical to [`DType::Int64`] but carries the
/// "nanoseconds since 1970-01-01 UTC" meaning in the type system.
TimestampNs,
}
impl DType {
/// Returns the byte size of a single element for fixed-width types,
/// or `None` for variable-length types.
pub fn element_size(&self) -> Option<usize> {
match self {
DType::Bool | DType::Int8 | DType::UInt8 => Some(1),
DType::Int16 | DType::UInt16 => Some(2),
DType::Int32 | DType::UInt32 | DType::Float32 => Some(4),
DType::Int64 | DType::UInt64 | DType::Float64 | DType::TimestampNs => Some(8),
DType::FixedSizeList { child, size } => {
child.element_size().map(|cs| cs * (*size as usize))
}
DType::String | DType::Binary | DType::List { .. } => None,
}
}
/// Returns `true` if the type requires an offsets buffer
/// (i.e. it is variable-length).
pub fn is_variable_length(&self) -> bool {
self.element_size().is_none()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fixed_element_sizes() {
assert_eq!(DType::Bool.element_size(), Some(1));
assert_eq!(DType::Int8.element_size(), Some(1));
assert_eq!(DType::Int16.element_size(), Some(2));
assert_eq!(DType::Int32.element_size(), Some(4));
assert_eq!(DType::Int64.element_size(), Some(8));
assert_eq!(DType::UInt8.element_size(), Some(1));
assert_eq!(DType::UInt16.element_size(), Some(2));
assert_eq!(DType::UInt32.element_size(), Some(4));
assert_eq!(DType::UInt64.element_size(), Some(8));
assert_eq!(DType::Float32.element_size(), Some(4));
assert_eq!(DType::Float64.element_size(), Some(8));
assert_eq!(DType::TimestampNs.element_size(), Some(8));
}
#[test]
fn variable_length_types() {
assert!(DType::String.is_variable_length());
assert!(DType::Binary.is_variable_length());
assert!(
DType::List {
child: Box::new(DType::Int32)
}
.is_variable_length()
);
}
#[test]
fn fixed_size_list_element_size() {
let dt = DType::FixedSizeList {
child: Box::new(DType::Int16),
size: 3,
};
assert_eq!(dt.element_size(), Some(6));
assert!(!dt.is_variable_length());
}
#[test]
fn nested_fixed_size_list() {
let inner = DType::FixedSizeList {
child: Box::new(DType::UInt8),
size: 4,
};
let outer = DType::FixedSizeList {
child: Box::new(inner),
size: 2,
};
assert_eq!(outer.element_size(), Some(8));
}
#[test]
fn fixed_size_list_with_vlen_child() {
let dt = DType::FixedSizeList {
child: Box::new(DType::String),
size: 3,
};
assert!(dt.is_variable_length());
assert_eq!(dt.element_size(), None);
}
}