repo_stream/
mst.rs

1//! Low-level types for parsing raw atproto MST CARs
2//!
3//! The primary aim is to work through the **tree** structure. Non-node blocks
4//! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever.
5
6use ipld_core::cid::Cid;
7use serde::Deserialize;
8
9/// The top-level data object in a repository's tree is a signed commit.
10#[derive(Debug, Deserialize)]
11// #[serde(deny_unknown_fields)]
12pub struct Commit {
13    /// the account DID associated with the repo, in strictly normalized form
14    /// (eg, lowercase as appropriate)
15    pub did: String,
16    /// fixed value of 3 for this repo format version
17    pub version: u64,
18    /// pointer to the top of the repo contents tree structure (MST)
19    pub data: Cid,
20    /// revision of the repo, used as a logical clock.
21    ///
22    /// TID format. Must increase monotonically. Recommend using current
23    /// timestamp as TID; rev values in the "future" (beyond a fudge factor)
24    /// should be ignored and not processed
25    pub rev: String,
26    /// pointer (by hash) to a previous commit object for this repository.
27    ///
28    /// Could be used to create a chain of history, but largely unused (included
29    /// for v2 backwards compatibility). In version 3 repos, this field must
30    /// exist in the CBOR object, but is virtually always null. NOTE: previously
31    /// specified as nullable and optional, but this caused interoperability
32    /// issues.
33    pub prev: Option<Cid>,
34    /// cryptographic signature of this commit, as raw bytes
35    #[serde(with = "serde_bytes")]
36    pub sig: Vec<u8>,
37}
38
39/// MST node data schema
40#[derive(Debug, Deserialize, PartialEq)]
41#[serde(deny_unknown_fields)]
42pub struct Node {
43    /// link to sub-tree Node on a lower level and with all keys sorting before
44    /// keys at this node
45    #[serde(rename = "l")]
46    pub left: Option<Cid>,
47    /// ordered list of TreeEntry objects
48    ///
49    /// atproto MSTs have a fanout of 4, so there can be max 4 entries.
50    #[serde(rename = "e")]
51    pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]?
52}
53
54impl Node {
55    /// test if a block could possibly be a node
56    ///
57    /// we can't eagerly decode records except where we're *sure* they cannot be
58    /// an mst node (and even then we can only attempt) because you can't know
59    /// with certainty what a block is supposed to be without actually walking
60    /// the tree.
61    ///
62    /// so if a block *could be* a node, any record converter must postpone
63    /// processing. if it turns out it happens to be a very node-looking record,
64    /// well, sorry, it just has to only be processed later when that's known.
65    pub fn could_be(bytes: impl AsRef<[u8]>) -> bool {
66        const NODE_FINGERPRINT: [u8; 3] = [
67            0xA2, // map length 2 (for "l" and "e" keys)
68            0x61, // text length 1
69            b'e', // "e" before "l" because map keys have to be lex-sorted
70                  // 0x8?: "e" has array (0x100 upper 3 bits) of some length
71        ];
72        let bytes = bytes.as_ref();
73        bytes.starts_with(&NODE_FINGERPRINT)
74            && bytes
75                .get(3)
76                .map(|b| b & 0b1110_0000 == 0x80)
77                .unwrap_or(false)
78    }
79
80    /// Check if a node has any entries
81    ///
82    /// An empty repository with no records is represented as a single MST node
83    /// with an empty array of entries. This is the only situation in which a
84    /// tree may contain an empty leaf node which does not either contain keys
85    /// ("entries") or point to a sub-tree containing entries.
86    ///
87    /// TODO: to me this is slightly unclear with respect to `l` (ask someone).
88    /// ...is that what "The top of the tree must not be a an empty node which
89    /// only points to a sub-tree." is referring to?
90    pub fn is_empty(&self) -> bool {
91        self.left.is_none() && self.entries.is_empty()
92    }
93}
94
95/// TreeEntry object
96#[derive(Debug, Deserialize, PartialEq)]
97#[serde(deny_unknown_fields)]
98pub struct Entry {
99    /// count of bytes shared with previous TreeEntry in this Node (if any)
100    #[serde(rename = "p")]
101    pub prefix_len: usize,
102    /// remainder of key for this TreeEntry, after "prefixlen" have been removed
103    #[serde(rename = "k", with = "serde_bytes")]
104    pub keysuffix: Vec<u8>, // can we String this here?
105    /// link to the record data (CBOR) for this entry
106    #[serde(rename = "v")]
107    pub value: Cid,
108    /// link to a sub-tree Node at a lower level
109    ///
110    /// the lower level must have keys sorting after this TreeEntry's key (to
111    /// the "right"), but before the next TreeEntry's key in this Node (if any)
112    #[serde(rename = "t")]
113    pub tree: Option<Cid>,
114}