Skip to main content

atlas/
schema.rs

1use array_format::{DType, FillValue};
2use serde::{Deserialize, Serialize};
3
4use crate::config::Codec;
5
6/// A per-dataset attribute value stored in `atlas.json`.
7///
8/// Atlas supports five attribute types — booleans, 64-bit signed integers,
9/// 64-bit floats, UTF-8 strings, and nanosecond-precision timestamps. The
10/// JSON form is untagged: each variant serializes as its natural JSON value
11/// (`true`, `42`, `1.5`, `"hello"`, or an RFC 3339 string for the timestamp).
12#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
13#[serde(untagged)]
14pub enum Attr {
15    /// Boolean attribute. Listed first because `#[serde(untagged)]` tries
16    /// variants in order and `bool` only matches JSON `true`/`false`.
17    Bool(bool),
18    /// Nanosecond-precision UTC timestamp. Stored as an RFC 3339 string;
19    /// the deserializer parses strictly, so non-timestamp strings fall
20    /// through to the `String` variant.
21    #[serde(with = "timestamp_ns_serde")]
22    TimestampNanoseconds(i64),
23    /// UTF-8 string attribute.
24    String(String),
25    /// 64-bit signed integer attribute (JSON numbers without a decimal point).
26    Int64(i64),
27    /// 64-bit float attribute (JSON numbers with a decimal point or exponent).
28    Float64(f64),
29}
30
31mod timestamp_ns_serde {
32    use chrono::{DateTime, SecondsFormat, Utc};
33    use serde::{Deserialize, Deserializer, Serializer};
34
35    pub fn serialize<S: Serializer>(nanos: &i64, s: S) -> Result<S::Ok, S::Error> {
36        let dt = DateTime::<Utc>::from_timestamp_nanos(*nanos);
37        // AutoSi: shortest faithful repr (drops trailing-zero subsecond digits).
38        s.serialize_str(&dt.to_rfc3339_opts(SecondsFormat::AutoSi, true))
39    }
40
41    pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<i64, D::Error> {
42        let s = <&str>::deserialize(d)?;
43        let dt = DateTime::parse_from_rfc3339(s)
44            .map_err(serde::de::Error::custom)?
45            .with_timezone(&Utc);
46        dt.timestamp_nanos_opt().ok_or_else(|| {
47            serde::de::Error::custom("timestamp out of nanosecond range (1677-09-21 .. 2262-04-11)")
48        })
49    }
50}
51
52/// Schema for a single named array within a dataset.
53#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
54pub struct ArraySchema {
55    /// Element type of this array.
56    #[serde(with = "dtype_serde")]
57    pub dtype: DType,
58    /// Logical shape, one entry per axis.
59    pub shape: Vec<usize>,
60    /// On-disk chunk shape, same rank as `shape`. Equal to `shape` for
61    /// single-chunk arrays.
62    pub chunk_shape: Vec<usize>,
63    /// Named dimensions, one per axis. Order matches `shape`.
64    pub dimension_names: Vec<String>,
65    /// Codec used when this array was first created; controls how new blocks are written.
66    pub codec: Codec,
67}
68
69/// Serde helpers for [`DType`] (which uses rkyv, not serde).
70pub(crate) mod dtype_serde {
71    use array_format::DType;
72    use serde::{Deserialize, Deserializer, Serialize, Serializer};
73
74    pub fn serialize<S: Serializer>(dtype: &DType, s: S) -> Result<S::Ok, S::Error> {
75        DTypeRepr::from(dtype.clone()).serialize(s)
76    }
77
78    pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<DType, D::Error> {
79        DTypeRepr::deserialize(d).map(DType::from)
80    }
81
82    #[derive(Serialize, Deserialize)]
83    #[serde(tag = "type", content = "args", rename_all = "snake_case")]
84    enum DTypeRepr {
85        Bool,
86        Int8,
87        Int16,
88        Int32,
89        Int64,
90        UInt8,
91        UInt16,
92        UInt32,
93        UInt64,
94        Float32,
95        Float64,
96        String,
97        Binary,
98        #[serde(rename = "timestamp_nanoseconds")]
99        TimestampNs,
100        FixedSizeList {
101            child: Box<DTypeRepr>,
102            size: u32,
103        },
104        List {
105            child: Box<DTypeRepr>,
106        },
107    }
108
109    impl From<DType> for DTypeRepr {
110        fn from(d: DType) -> Self {
111            match d {
112                DType::Bool => Self::Bool,
113                DType::Int8 => Self::Int8,
114                DType::Int16 => Self::Int16,
115                DType::Int32 => Self::Int32,
116                DType::Int64 => Self::Int64,
117                DType::UInt8 => Self::UInt8,
118                DType::UInt16 => Self::UInt16,
119                DType::UInt32 => Self::UInt32,
120                DType::UInt64 => Self::UInt64,
121                DType::Float32 => Self::Float32,
122                DType::Float64 => Self::Float64,
123                DType::String => Self::String,
124                DType::Binary => Self::Binary,
125                DType::TimestampNs => Self::TimestampNs,
126                DType::FixedSizeList { child, size } => Self::FixedSizeList {
127                    child: Box::new((*child).into()),
128                    size,
129                },
130                DType::List { child } => Self::List {
131                    child: Box::new((*child).into()),
132                },
133            }
134        }
135    }
136
137    impl From<DTypeRepr> for DType {
138        fn from(d: DTypeRepr) -> Self {
139            match d {
140                DTypeRepr::Bool => Self::Bool,
141                DTypeRepr::Int8 => Self::Int8,
142                DTypeRepr::Int16 => Self::Int16,
143                DTypeRepr::Int32 => Self::Int32,
144                DTypeRepr::Int64 => Self::Int64,
145                DTypeRepr::UInt8 => Self::UInt8,
146                DTypeRepr::UInt16 => Self::UInt16,
147                DTypeRepr::UInt32 => Self::UInt32,
148                DTypeRepr::UInt64 => Self::UInt64,
149                DTypeRepr::Float32 => Self::Float32,
150                DTypeRepr::Float64 => Self::Float64,
151                DTypeRepr::String => Self::String,
152                DTypeRepr::Binary => Self::Binary,
153                DTypeRepr::TimestampNs => Self::TimestampNs,
154                DTypeRepr::FixedSizeList { child, size } => Self::FixedSizeList {
155                    child: Box::new((*child).into()),
156                    size,
157                },
158                DTypeRepr::List { child } => Self::List {
159                    child: Box::new((*child).into()),
160                },
161            }
162        }
163    }
164}