repo_stream/
mst.rs

1//! Low-level types for parsing raw atproto MST CARs
2//!
3//! The primary aim is to work through the **tree** structure. Non-node blocks
4//! are left as raw bytes, for upper levels to parse into DAG-CBOR or whatever.
5
6use ipld_core::cid::Cid;
7use serde::Deserialize;
8
9/// The top-level data object in a repository's tree is a signed commit.
10#[derive(Debug, Deserialize)]
11// #[serde(deny_unknown_fields)]
12pub struct Commit {
13    /// the account DID associated with the repo, in strictly normalized form
14    /// (eg, lowercase as appropriate)
15    pub did: String,
16    /// fixed value of 3 for this repo format version
17    pub version: u64,
18    /// pointer to the top of the repo contents tree structure (MST)
19    pub data: Cid,
20    /// revision of the repo, used as a logical clock.
21    ///
22    /// TID format. Must increase monotonically. Recommend using current
23    /// timestamp as TID; rev values in the "future" (beyond a fudge factor)
24    /// should be ignored and not processed
25    pub rev: String,
26    /// pointer (by hash) to a previous commit object for this repository.
27    ///
28    /// Could be used to create a chain of history, but largely unused (included
29    /// for v2 backwards compatibility). In version 3 repos, this field must
30    /// exist in the CBOR object, but is virtually always null. NOTE: previously
31    /// specified as nullable and optional, but this caused interoperability
32    /// issues.
33    pub prev: Option<Cid>,
34    /// cryptographic signature of this commit, as raw bytes
35    #[serde(with = "serde_bytes")]
36    pub sig: Vec<u8>,
37}
38
39/// MST node data schema
40#[derive(Debug, Deserialize, PartialEq)]
41#[serde(deny_unknown_fields)]
42pub(crate) struct Node {
43    /// link to sub-tree Node on a lower level and with all keys sorting before
44    /// keys at this node
45    #[serde(rename = "l")]
46    pub left: Option<Cid>,
47    /// ordered list of TreeEntry objects
48    ///
49    /// atproto MSTs have a fanout of 4, so there can be max 4 entries.
50    #[serde(rename = "e")]
51    pub entries: Vec<Entry>, // maybe we can do [Option<Entry>; 4]?
52}
53
54impl Node {
55    /// test if a block could possibly be a node
56    ///
57    /// we can't eagerly decode records except where we're *sure* they cannot be
58    /// an mst node (and even then we can only attempt) because you can't know
59    /// with certainty what a block is supposed to be without actually walking
60    /// the tree.
61    ///
62    /// so if a block *could be* a node, any record converter must postpone
63    /// processing. if it turns out it happens to be a very node-looking record,
64    /// well, sorry, it just has to only be processed later when that's known.
65    pub(crate) fn could_be(bytes: impl AsRef<[u8]>) -> bool {
66        const NODE_FINGERPRINT: [u8; 3] = [
67            0xA2, // map length 2 (for "l" and "e" keys)
68            0x61, // text length 1
69            b'e', // "e" before "l" because map keys have to be lex-sorted
70                  // 0x8?: "e" has array (0x100 upper 3 bits) of some length
71        ];
72        let bytes = bytes.as_ref();
73        bytes.starts_with(&NODE_FINGERPRINT)
74            && bytes
75                .get(3)
76                .map(|b| b & 0b1110_0000 == 0x80)
77                .unwrap_or(false)
78    }
79
80    /// Check if a node has any entries
81    ///
82    /// An empty repository with no records is represented as a single MST node
83    /// with an empty array of entries. This is the only situation in which a
84    /// tree may contain an empty leaf node which does not either contain keys
85    /// ("entries") or point to a sub-tree containing entries.
86    pub(crate) fn is_empty(&self) -> bool {
87        self.left.is_none() && self.entries.is_empty()
88    }
89}
90
91/// TreeEntry object
92#[derive(Debug, Deserialize, PartialEq)]
93#[serde(deny_unknown_fields)]
94pub(crate) struct Entry {
95    /// count of bytes shared with previous TreeEntry in this Node (if any)
96    #[serde(rename = "p")]
97    pub prefix_len: usize,
98    /// remainder of key for this TreeEntry, after "prefixlen" have been removed
99    #[serde(rename = "k", with = "serde_bytes")]
100    pub keysuffix: Vec<u8>, // can we String this here?
101    /// link to the record data (CBOR) for this entry
102    #[serde(rename = "v")]
103    pub value: Cid,
104    /// link to a sub-tree Node at a lower level
105    ///
106    /// the lower level must have keys sorting after this TreeEntry's key (to
107    /// the "right"), but before the next TreeEntry's key in this Node (if any)
108    #[serde(rename = "t")]
109    pub tree: Option<Cid>,
110}