repo_stream/mst.rs
1//! Low-level types for parsing raw atproto MST CARs
2//!
3//! The primary aim is to work through the **tree** structure. Non-node blocks
4//! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever.
5
6use ipld_core::cid::Cid;
7use serde::Deserialize;
8
9/// The top-level data object in a repository's tree is a signed commit.
10#[derive(Debug, Deserialize)]
11// #[serde(deny_unknown_fields)]
12pub struct Commit {
13 /// the account DID associated with the repo, in strictly normalized form
14 /// (eg, lowercase as appropriate)
15 pub did: String,
16 /// fixed value of 3 for this repo format version
17 pub version: u64,
18 /// pointer to the top of the repo contents tree structure (MST)
19 pub data: Cid,
20 /// revision of the repo, used as a logical clock.
21 ///
22 /// TID format. Must increase monotonically. Recommend using current
23 /// timestamp as TID; rev values in the "future" (beyond a fudge factor)
24 /// should be ignored and not processed
25 pub rev: String,
26 /// pointer (by hash) to a previous commit object for this repository.
27 ///
28 /// Could be used to create a chain of history, but largely unused (included
29 /// for v2 backwards compatibility). In version 3 repos, this field must
30 /// exist in the CBOR object, but is virtually always null. NOTE: previously
31 /// specified as nullable and optional, but this caused interoperability
32 /// issues.
33 pub prev: Option<Cid>,
34 /// cryptographic signature of this commit, as raw bytes
35 #[serde(with = "serde_bytes")]
36 pub sig: Vec<u8>,
37}
38
39/// MST node data schema
40#[derive(Debug, Deserialize, PartialEq)]
41#[serde(deny_unknown_fields)]
42pub struct Node {
43 /// link to sub-tree Node on a lower level and with all keys sorting before
44 /// keys at this node
45 #[serde(rename = "l")]
46 pub left: Option<Cid>,
47 /// ordered list of TreeEntry objects
48 ///
49 /// atproto MSTs have a fanout of 4, so there can be max 4 entries.
50 #[serde(rename = "e")]
51 pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]?
52}
53
54impl Node {
55 /// test if a block could possibly be a node
56 ///
57 /// we can't eagerly decode records except where we're *sure* they cannot be
58 /// an mst node (and even then we can only attempt) because you can't know
59 /// with certainty what a block is supposed to be without actually walking
60 /// the tree.
61 ///
62 /// so if a block *could be* a node, any record converter must postpone
63 /// processing. if it turns out it happens to be a very node-looking record,
64 /// well, sorry, it just has to only be processed later when that's known.
65 pub fn could_be(bytes: impl AsRef<[u8]>) -> bool {
66 const NODE_FINGERPRINT: [u8; 3] = [
67 0xA2, // map length 2 (for "l" and "e" keys)
68 0x61, // text length 1
69 b'e', // "e" before "l" because map keys have to be lex-sorted
70 // 0x8?: "e" has array (0x100 upper 3 bits) of some length
71 ];
72 let bytes = bytes.as_ref();
73 bytes.starts_with(&NODE_FINGERPRINT)
74 && bytes
75 .get(3)
76 .map(|b| b & 0b1110_0000 == 0x80)
77 .unwrap_or(false)
78 }
79
80 /// Check if a node has any entries
81 ///
82 /// An empty repository with no records is represented as a single MST node
83 /// with an empty array of entries. This is the only situation in which a
84 /// tree may contain an empty leaf node which does not either contain keys
85 /// ("entries") or point to a sub-tree containing entries.
86 ///
87 /// TODO: to me this is slightly unclear with respect to `l` (ask someone).
88 /// ...is that what "The top of the tree must not be a an empty node which
89 /// only points to a sub-tree." is referring to?
90 pub fn is_empty(&self) -> bool {
91 self.left.is_none() && self.entries.is_empty()
92 }
93}
94
95/// TreeEntry object
96#[derive(Debug, Deserialize, PartialEq)]
97#[serde(deny_unknown_fields)]
98pub struct Entry {
99 /// count of bytes shared with previous TreeEntry in this Node (if any)
100 #[serde(rename = "p")]
101 pub prefix_len: usize,
102 /// remainder of key for this TreeEntry, after "prefixlen" have been removed
103 #[serde(rename = "k", with = "serde_bytes")]
104 pub keysuffix: Vec<u8>, // can we String this here?
105 /// link to the record data (CBOR) for this entry
106 #[serde(rename = "v")]
107 pub value: Cid,
108 /// link to a sub-tree Node at a lower level
109 ///
110 /// the lower level must have keys sorting after this TreeEntry's key (to
111 /// the "right"), but before the next TreeEntry's key in this Node (if any)
112 #[serde(rename = "t")]
113 pub tree: Option<Cid>,
114}