Skip to main content

tensogram/
types.rs

1// (C) Copyright 2026- ECMWF and individual contributors.
2//
3// This software is licensed under the terms of the Apache Licence Version 2.0
4// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5// In applying this licence, ECMWF does not waive the privileges and immunities
6// granted to it by virtue of its status as an intergovernmental organisation nor
7// does it submit to any jurisdiction.
8
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11
12use crate::dtype::Dtype;
13
14pub use tensogram_encodings::ByteOrder;
15
16/// Hash descriptor for payload integrity verification.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct HashDescriptor {
19    #[serde(rename = "type")]
20    pub hash_type: String,
21    pub value: String,
22}
23
24/// Per-object descriptor — merges tensor metadata and encoding instructions.
25///
26/// Each data object frame carries one of these as its CBOR descriptor.
27/// This replaces the v1 split between `ObjectDescriptor` (tensor info)
28/// and `PayloadDescriptor` (encoding info).
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DataObjectDescriptor {
31    // ── Tensor metadata ──
32    #[serde(rename = "type")]
33    pub obj_type: String,
34    pub ndim: u64,
35    pub shape: Vec<u64>,
36    pub strides: Vec<u64>,
37    pub dtype: Dtype,
38
39    // ── Encoding pipeline ──
40    pub byte_order: ByteOrder,
41    pub encoding: String,
42    pub filter: String,
43    pub compression: String,
44
45    /// Encoding/filter/compression parameters (reference_value, bits_per_value,
46    /// szip_block_offsets, etc.). Stored as ciborium::Value for flexibility.
47    #[serde(flatten)]
48    pub params: BTreeMap<String, ciborium::Value>,
49
50    /// Per-object integrity hash (set during encoding).
51    #[serde(skip_serializing_if = "Option::is_none")]
52    pub hash: Option<HashDescriptor>,
53}
54
55/// Global message metadata (carried in header/footer metadata frames).
56///
57/// The metadata frame CBOR has three named sections plus `version`:
58/// - `base`: per-object metadata array — one entry per data object, each
59///   entry holds ALL structured metadata for that object independently.
60///   The encoder auto-populates `_reserved_.tensor` (ndim/shape/strides/dtype)
61///   in each entry.
62/// - `_reserved_`: library internals (provenance: encoder info, time, uuid).
63///   Client code can read but MUST NOT write — the encoder validates this.
64/// - `_extra_`: client-writable catch-all for ad-hoc message-level annotations.
65///
66/// Unknown CBOR keys at the top level are silently ignored on decode.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct GlobalMetadata {
69    pub version: u16,
70
71    /// Per-object metadata array.  Each entry holds ALL structured metadata
72    /// for that data object.  Entries are independent — no tracking of what
73    /// is common across objects.
74    ///
75    /// The encoder auto-populates `_reserved_.tensor` (with ndim, shape,
76    /// strides, dtype) in each entry.  Application code may pre-populate
77    /// additional keys (e.g. `"mars": {…}`) before encoding; the encoder
78    /// preserves them.
79    #[serde(default, skip_serializing_if = "Vec::is_empty")]
80    pub base: Vec<BTreeMap<String, ciborium::Value>>,
81
82    /// Library internals — provenance info (encoder, time, uuid).
83    /// Client code can read but MUST NOT write; the encoder overwrites this.
84    #[serde(
85        rename = "_reserved_",
86        default,
87        skip_serializing_if = "BTreeMap::is_empty"
88    )]
89    pub reserved: BTreeMap<String, ciborium::Value>,
90
91    /// Client-writable catch-all for ad-hoc message-level annotations.
92    #[serde(
93        rename = "_extra_",
94        default,
95        skip_serializing_if = "BTreeMap::is_empty"
96    )]
97    pub extra: BTreeMap<String, ciborium::Value>,
98}
99
100/// Index frame payload — maps object ordinals to byte offsets.
101#[derive(Debug, Clone, Default)]
102pub struct IndexFrame {
103    pub object_count: u64,
104    /// Byte offset of each data object frame from message start.
105    pub offsets: Vec<u64>,
106    /// Total byte length of each data object frame, excluding alignment padding.
107    pub lengths: Vec<u64>,
108}
109
110/// Hash frame payload — per-object integrity hashes.
111#[derive(Debug, Clone)]
112pub struct HashFrame {
113    pub object_count: u64,
114    pub hash_type: String,
115    pub hashes: Vec<String>,
116}
117
118impl Default for GlobalMetadata {
119    fn default() -> Self {
120        Self {
121            version: 2,
122            base: Vec::new(),
123            reserved: BTreeMap::new(),
124            extra: BTreeMap::new(),
125        }
126    }
127}
128
129/// A decoded object: its descriptor paired with its raw decoded payload bytes.
130pub type DecodedObject = (DataObjectDescriptor, Vec<u8>);