Skip to main content

amaru_kernel/cardano/
metadatum.rs

1// Copyright 2026 PRAGMA
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::BTreeMap;
16
17use crate::{Int, cbor};
18
19/// A piece of (structured) metadata found in transaction.
20#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, serde::Serialize, serde::Deserialize)]
21pub enum Metadatum {
22    // NOTE: CBOR (signed) integers
23    //
24    // We use CBOR's Int here and not a Rust's i64 because CBOR's signed integers are encoded next
25    // to their signs, meaning that they range from -2^64 to 2^64 - 1; unlike Rust's i64 which
26    // ranges from -2^63 .. 2^63 - 1.
27    //
28    // Simply using an i128 isn't satisfactory because it now allows the representation of invalid
29    // states on the Rust's side (we may end up with integers that are far beyond what's
30    // acceptable).
31    //
32    // "Funny-enough", the Haskell code uses arbitrary-length integers here; although only allow
33    // decoding in the [-2^64; 2^64 - 1] range. Encoding is fine with arbitrary large integers;
34    // thus violating roundtripping invariants.
35    Int(Int),
36    Bytes(Vec<u8>),
37    Text(String),
38    Array(Vec<Metadatum>),
39    Map(BTreeMap<Metadatum, Metadatum>),
40}
41
42/// FIXME: Multi-era + length checks on bytes and text
43///
44/// Ensure that this decoder is multi-era capable and also correctly checks for bytes and
45/// (utf-8-encoded) text to be encoded as chunks.
46impl<'b, C> cbor::Decode<'b, C> for Metadatum {
47    fn decode(d: &mut cbor::Decoder<'b>, ctx: &mut C) -> Result<Self, cbor::decode::Error> {
48        use cbor::data::Type::*;
49
50        #[allow(clippy::wildcard_enum_match_arm)]
51        match d.datatype()? {
52            U8 | U16 | U32 | U64 | I8 | I16 | I32 | I64 | Int => {
53                let i = d.decode()?;
54                Ok(Metadatum::Int(i))
55            }
56            Bytes => Ok(Metadatum::Bytes(Vec::from(d.decode_with::<C, cbor::bytes::ByteVec>(ctx)?))),
57            String => Ok(Metadatum::Text(d.decode_with(ctx)?)),
58            Array | ArrayIndef => Ok(Metadatum::Array(d.decode_with(ctx)?)),
59            Map | MapIndef => Ok(Metadatum::Map(d.decode_with(ctx)?)),
60            any => {
61                Err(cbor::decode::Error::message(format!("unexpected CBOR datatype {any:?} when decoding metadatum")))
62            }
63        }
64    }
65}
66
67impl<C> cbor::Encode<C> for Metadatum {
68    fn encode<W: cbor::encode::Write>(
69        &self,
70        e: &mut cbor::Encoder<W>,
71        ctx: &mut C,
72    ) -> Result<(), cbor::encode::Error<W::Error>> {
73        match self {
74            Metadatum::Int(a) => {
75                e.encode_with(a, ctx)?;
76            }
77            // FIXME: Use stream encoding for length > 64
78            Metadatum::Bytes(a) => {
79                e.encode_with(<&cbor::bytes::ByteSlice>::from(a.as_slice()), ctx)?;
80            }
81            // FIXME: Use stream encoding for length > 64
82            Metadatum::Text(a) => {
83                e.encode_with(a, ctx)?;
84            }
85            Metadatum::Array(a) => {
86                e.encode_with(a, ctx)?;
87            }
88            Metadatum::Map(a) => {
89                e.encode_with(a, ctx)?;
90            }
91        };
92
93        Ok(())
94    }
95}
96
97#[cfg(test)]
98mod tests {
99    use std::collections::BTreeMap;
100
101    use test_case::test_case;
102
103    use super::Metadatum;
104    use crate::{Int, from_cbor_no_leftovers};
105
106    fn int(n: i128) -> Metadatum {
107        Metadatum::Int(Int::try_from(n).unwrap())
108    }
109
110    fn bytes(b: &[u8]) -> Metadatum {
111        Metadatum::Bytes(b.to_vec())
112    }
113
114    fn text(s: &str) -> Metadatum {
115        Metadatum::Text(s.to_string())
116    }
117
118    fn list(xs: &[Metadatum]) -> Metadatum {
119        Metadatum::Array(xs.to_vec())
120    }
121
122    fn map(kvs: &[(Metadatum, Metadatum)]) -> Metadatum {
123        Metadatum::Map(BTreeMap::from_iter(kvs.to_vec()))
124    }
125
126    #[test_case("00", int(0))]
127    #[test_case("01", int(1))]
128    #[test_case("21", int(-2))]
129    #[test_case("0e", int(14))]
130    #[test_case("37", int(-24))]
131    #[test_case("1819", int(25))]
132    #[test_case("387f", int(-128))]
133    #[test_case("191bfe", int(7166))]
134    #[test_case("39df7a", int(-57211))]
135    #[test_case("1A000186A0", int(100000))]
136    #[test_case("1B1000000000000019", int(1152921504606847001))]
137    #[test_case("3B0000000100000000", int(-4294967297))]
138    #[test_case("1B8000000000000000", int(9223372036854775808))]
139    #[test_case("1B8000000000000001", int(9223372036854775809))]
140    #[test_case("3B7FFFFFFFFFFFFFFF", int(-9223372036854775808))]
141    #[test_case("3B8000000000000000", int(-9223372036854775809))]
142    #[test_case("1BFFFFFFFFFFFFFFFF", int(18446744073709551615))]
143    #[test_case("3BFFFFFFFFFFFFFFFF", int(-18446744073709551616))]
144    #[test_case("40", bytes(b""))]
145    #[test_case("43666F6F", bytes(b"foo"))]
146    #[test_case(
147        "5820667A841296E8057AB7792BFB8FD16F8A39B0B648F1E6F0FA586C7785033EC00C",
148        bytes(hex::decode("667a841296e8057ab7792bfb8fd16f8a39b0b648f1e6f0fa586c7785033ec00c").unwrap().as_slice());
149        "bytes - some hash"
150    )]
151    // NOTE: on invalid Metadatum
152    //
153    // Interestingly, the ledger doesn't allow text and bytes chunks over 64 bytes; but will still
154    // allow to deserialize them. The metadatum validation happens through a ledger rule, rather
155    // than being a decoder failure. While it should be equivalent, we will match the Haskell's
156    // behaviour here and allow decoding of over-64 bytes text and bytes. Note that this is
157    // "generally" safe provided that the size of the transaction / block is somewhat checked
158    // beforehand.
159    #[test_case(
160        "58E74C6F72656D20697073756D20646F6C6F722073697420616D65742C20636F6E73656374657475722061646970697363696E6720656C69742C2073656420646F20656975736D6F642074656D706F7220696E6369646964756E74207574206C61626F726520657420646F6C6F7265206D61676E6120616C697175612E20557420656E696D206164206D696E696D2076656E69616D2C2071756973206E6F737472756420657865726369746174696F6E20756C6C616D636F206C61626F726973206E69736920757420616C697175697020657820656120636F6D6D6F646F20636F6E7365717561742E",
161        bytes(b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.");
162        "bytes - over 64"
163    )]
164    #[test_case(
165        "5840F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9",
166        bytes("💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩".as_bytes());
167        "bytes - exactly 64"
168    )]
169    #[test_case("60", text(""))]
170    #[test_case("63666F6F", text("foo"))]
171    // NOTE: on invalid Metadatum
172    //
173    // See above.
174    #[test_case(
175        "78E74C6F72656D20697073756D20646F6C6F722073697420616D65742C20636F6E73656374657475722061646970697363696E6720656C69742C2073656420646F20656975736D6F642074656D706F7220696E6369646964756E74207574206C61626F726520657420646F6C6F7265206D61676E6120616C697175612E20557420656E696D206164206D696E696D2076656E69616D2C2071756973206E6F737472756420657865726369746174696F6E20756C6C616D636F206C61626F726973206E69736920757420616C697175697020657820656120636F6D6D6F646F20636F6E7365717561742E",
176        text("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.");
177        "text - over 64"
178    )]
179    #[test_case(
180        "7840F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9F09F92A9",
181        text("💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩");
182        "text - exactly 64"
183    )]
184    #[test_case("80", list(&[]))]
185    #[test_case("9FFF", list(&[]))]
186    #[test_case("8101", list(&[int(1)]))]
187    #[test_case("9F020304FF", list(&[int(2), int(3), int(4)]))]
188    #[test_case("83020304", list(&[int(2), int(3), int(4)]))]
189    #[test_case("9F189F801880FF", list(&[int(159), list(&[]), int(128)]))]
190    #[test_case("83189F801880", list(&[int(159), list(&[]), int(128)]))]
191    #[test_case("A0", map(&[]))]
192    #[test_case("BFFF", map(&[]))]
193    #[test_case("A1416101", map(&[(bytes(b"a"), int(1))]))]
194    #[test_case("BF416101FF", map(&[(bytes(b"a"), int(1))]))]
195    #[test_case("A2416102036162", map(&[(bytes(b"a"), int(2)), (int(3), text("b"))]))]
196    #[test_case("BF416102036162FF", map(&[(bytes(b"a"), int(2)), (int(3), text("b"))]))]
197    #[test_case(
198        "A2019FFF1880BFFF",
199        map(&[(int(1), list(&[])), (int(128), map(&[]))])
200    )]
201    #[test_case(
202        "BF019FFF1880BFFFFF",
203        map(&[(int(1), list(&[])), (int(128), map(&[]))])
204    )]
205    fn decode_wellformed(fixture: &str, expected: Metadatum) {
206        let bytes = hex::decode(fixture).unwrap();
207        match from_cbor_no_leftovers::<Metadatum>(bytes.as_slice()) {
208            Err(err) => panic!("{err}"),
209            Ok(result) => assert_eq!(result, expected),
210        }
211    }
212
213    #[test_case("C249010000000000000000", "decode error: unexpected CBOR datatype Tag when decoding metadatum")]
214    #[test_case("1901", "end of input bytes")]
215    #[test_case("6261", "end of input bytes")]
216    #[test_case("4261", "end of input bytes")]
217    #[test_case("784C6F72656D20697073756D20646F6C6F722073697420616D6574", "end of input bytes")]
218    #[test_case("C349010000000000000000", "decode error: unexpected CBOR datatype Tag when decoding metadatum")]
219    #[test_case("830102", "end of input bytes")]
220    #[test_case("9F0102", "end of input bytes")]
221    #[test_case("82010203", "decode error: leftovers bytes after decoding after position 3")]
222    #[test_case("9F0102FF03", "decode error: leftovers bytes after decoding after position 4")]
223    #[test_case("A20102", "end of input bytes")]
224    #[test_case("BF0102", "end of input bytes")]
225    #[test_case("BF01FF", "decode error: unexpected CBOR datatype Break when decoding metadatum")]
226    #[test_case("A101020304", "decode error: leftovers bytes after decoding after position 3")]
227    #[test_case("BF0102FF0304", "decode error: leftovers bytes after decoding after position 4")]
228    fn decode_malformed(fixture: &str, expected: &str) {
229        let bytes = hex::decode(fixture).unwrap();
230        match from_cbor_no_leftovers::<Metadatum>(bytes.as_slice()) {
231            Err(err) => assert_eq!(err.to_string(), expected),
232            Ok(result) => panic!("{result:#?}"),
233        }
234    }
235}