tensogram/types.rs
1// (C) Copyright 2026- ECMWF and individual contributors.
2//
3// This software is licensed under the terms of the Apache Licence Version 2.0
4// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5// In applying this licence, ECMWF does not waive the privileges and immunities
6// granted to it by virtue of its status as an intergovernmental organisation nor
7// does it submit to any jurisdiction.
8
9use serde::{Deserialize, Serialize};
10use std::collections::BTreeMap;
11
12use crate::dtype::Dtype;
13
14pub use tensogram_encodings::ByteOrder;
15
16/// Hash descriptor for payload integrity verification.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct HashDescriptor {
19 #[serde(rename = "type")]
20 pub hash_type: String,
21 pub value: String,
22}
23
24/// Per-object descriptor — merges tensor metadata and encoding instructions.
25///
26/// Each data object frame carries one of these as its CBOR descriptor.
27/// This replaces the v1 split between `ObjectDescriptor` (tensor info)
28/// and `PayloadDescriptor` (encoding info).
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct DataObjectDescriptor {
31 // ── Tensor metadata ──
32 #[serde(rename = "type")]
33 pub obj_type: String,
34 pub ndim: u64,
35 pub shape: Vec<u64>,
36 pub strides: Vec<u64>,
37 pub dtype: Dtype,
38
39 // ── Encoding pipeline ──
40 pub byte_order: ByteOrder,
41 pub encoding: String,
42 pub filter: String,
43 pub compression: String,
44
45 /// Encoding/filter/compression parameters (reference_value, bits_per_value,
46 /// szip_block_offsets, etc.). Stored as ciborium::Value for flexibility.
47 #[serde(flatten)]
48 pub params: BTreeMap<String, ciborium::Value>,
49
50 /// Per-object integrity hash (set during encoding).
51 #[serde(skip_serializing_if = "Option::is_none")]
52 pub hash: Option<HashDescriptor>,
53}
54
55/// Global message metadata (carried in header/footer metadata frames).
56///
57/// The metadata frame CBOR has three named sections plus `version`:
58/// - `base`: per-object metadata array — one entry per data object, each
59/// entry holds ALL structured metadata for that object independently.
60/// The encoder auto-populates `_reserved_.tensor` (ndim/shape/strides/dtype)
61/// in each entry.
62/// - `_reserved_`: library internals (provenance: encoder info, time, uuid).
63/// Client code can read but MUST NOT write — the encoder validates this.
64/// - `_extra_`: client-writable catch-all for ad-hoc message-level annotations.
65///
66/// Unknown CBOR keys at the top level are silently ignored on decode.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct GlobalMetadata {
69 pub version: u16,
70
71 /// Per-object metadata array. Each entry holds ALL structured metadata
72 /// for that data object. Entries are independent — no tracking of what
73 /// is common across objects.
74 ///
75 /// The encoder auto-populates `_reserved_.tensor` (with ndim, shape,
76 /// strides, dtype) in each entry. Application code may pre-populate
77 /// additional keys (e.g. `"mars": {…}`) before encoding; the encoder
78 /// preserves them.
79 #[serde(default, skip_serializing_if = "Vec::is_empty")]
80 pub base: Vec<BTreeMap<String, ciborium::Value>>,
81
82 /// Library internals — provenance info (encoder, time, uuid).
83 /// Client code can read but MUST NOT write; the encoder overwrites this.
84 #[serde(
85 rename = "_reserved_",
86 default,
87 skip_serializing_if = "BTreeMap::is_empty"
88 )]
89 pub reserved: BTreeMap<String, ciborium::Value>,
90
91 /// Client-writable catch-all for ad-hoc message-level annotations.
92 #[serde(
93 rename = "_extra_",
94 default,
95 skip_serializing_if = "BTreeMap::is_empty"
96 )]
97 pub extra: BTreeMap<String, ciborium::Value>,
98}
99
100/// Index frame payload — maps object ordinals to byte offsets.
101#[derive(Debug, Clone, Default)]
102pub struct IndexFrame {
103 pub object_count: u64,
104 /// Byte offset of each data object frame from message start.
105 pub offsets: Vec<u64>,
106 /// Total byte length of each data object frame, excluding alignment padding.
107 pub lengths: Vec<u64>,
108}
109
110/// Hash frame payload — per-object integrity hashes.
111#[derive(Debug, Clone)]
112pub struct HashFrame {
113 pub object_count: u64,
114 pub hash_type: String,
115 pub hashes: Vec<String>,
116}
117
118impl Default for GlobalMetadata {
119 fn default() -> Self {
120 Self {
121 version: 2,
122 base: Vec::new(),
123 reserved: BTreeMap::new(),
124 extra: BTreeMap::new(),
125 }
126 }
127}
128
129/// A decoded object: its descriptor paired with its raw decoded payload bytes.
130pub type DecodedObject = (DataObjectDescriptor, Vec<u8>);