tezos_smart_rollup_encoding/dac/
pages.rs

1// SPDX-FileCopyrightText: 2022-2023 TriliTech <contact@trili.tech>
2// SPDX-FileCopyrightText: 2023 Marigold <contact@marigold.dev>
3//
4// SPDX-License-Identifier: MIT
5
6//! Encoding of DAC payload as a Merkle tree with an arbitrary branching
7//! factor greater or equal to 2. The serialization process works as follows:
8//!
9//! - A large sequence of bytes, the payload, is split into several pages
10//!   of fixed size, each of which is prefixed with a small sequence
11//!   of bytes (also of fixed size), which is referred to as the preamble
12//!   of the page. Pages obtained directly from the original payload
13//!   are referred to as `Contents pages`. Contents pages constitute the
14//!   leaves of the Merkle tree being built,
15//!
16//! - Each contents page (each of which is a sequence of bytes consisting
17//!   of the preamble followed by the actual contents from the original
18//!   payload) is then hashed. The size of each hash is fixed. The hashes are
19//!   concatenated together, and the resulting sequence of bytes is split
20//!   into pages of the same size of `Hashes pages`, each of which is
21//!   prefixed with a preamble whose size is the same as in Contents pages.
22//!   Hashes pages correspond to nodes of the Merkle tree being built, and
23//!   the children of a hash page are the (either Payload or Hashes) pages
24//!   whose hash appear into the former,
25//!
26//! - Hashes pages are hashed using the same process described above, leading
27//!   to a smaller list of hashes pages. To guarantee that the list of hashes
28//!   pages is actually smaller than the original list of pages being hashed,
29//!   we require the size of pages to be large enough to contain at least two
30//!   hashes.
31//!
32//! Merkle tree encodings of DAC pages are versioned, to allow for multiple
33//! hashing schemes to be used.
34
35use host::runtime::{Runtime, RuntimeError};
36
37/// Maximum size of dac pages is 4Kb.
38pub const MAX_PAGE_SIZE: usize = 4096;
39
40/// Tag size to distinguish hash/contents pages.
41pub(crate) const PAGE_TAG_SIZE: usize = 1;
42
43/// Prefix of 4-bytes to define how large contents/hash page is.
44pub(crate) const PAGE_SIZE_PREFIX_SIZE: usize = 4;
45
46/// Prefix of 5-bytes for page variant plus size.
47pub(crate) const PAGE_PREFIX_SIZE: usize = PAGE_TAG_SIZE + PAGE_SIZE_PREFIX_SIZE;
48
49/// Maximum content/hashes size that can fit in a page.
50pub(crate) const MAX_USABLE_PAGE_SIZE: usize = MAX_PAGE_SIZE - PAGE_PREFIX_SIZE;
51
52#[cfg(feature = "alloc")]
53pub use encoding::{prepare_preimages, Page, V0ContentPage, V0HashPage};
54use tezos_smart_rollup_core::PREIMAGE_HASH_SIZE;
55
56/// Hashes `content` into a preimage hash.
57///
58/// Content must be at most [`MAX_PAGE_SIZE`] bytes.
59#[cfg(feature = "alloc")]
60pub fn make_preimage_hash(
61    content: &[u8],
62) -> Result<[u8; PREIMAGE_HASH_SIZE], crypto::blake2b::Blake2bError> {
63    if content.len() > MAX_PAGE_SIZE {
64        return Err(crypto::blake2b::Blake2bError::Other);
65    }
66
67    let hash = crypto::blake2b::digest_256(content)?;
68    let mut root_hash: [u8; PREIMAGE_HASH_SIZE] = [0; PREIMAGE_HASH_SIZE];
69    root_hash[1..].copy_from_slice(&hash);
70    Ok(root_hash)
71}
72
73#[cfg(feature = "alloc")]
74mod encoding {
75    // TODO: <https://github.com/trilitech/tezedge/issues/17>
76    //       lint triggered by issue in encoding macro
77    #![allow(clippy::useless_format)]
78
79    use super::*;
80    use crate::dac::PreimageHash;
81    use tezos_data_encoding::enc::BinWriter;
82    use tezos_data_encoding::encoding::HasEncoding;
83    use tezos_data_encoding::nom::NomReader;
84    use tezos_smart_rollup_core::PREIMAGE_HASH_SIZE;
85
86    /// A Dac page: either a leaf node of contents, or node of hashes.
87    #[derive(Debug, HasEncoding, NomReader, BinWriter)]
88    #[encoding(tags = "u8")]
89    pub enum Page {
90        /// Content Page - see [`V0ContentPage`]
91        #[encoding(tag = 0)]
92        V0ContentPage(V0ContentPage),
93
94        /// Preimage Hash Page - see [`V0HashPage`]
95        #[encoding(tag = 1)]
96        V0HashPage(V0HashPage),
97    }
98
99    /// Content page consisting of a dynamic number of bytes.
100    #[derive(Debug, HasEncoding, NomReader, BinWriter)]
101    pub struct V0ContentPage {
102        #[encoding(dynamic, list)]
103        contents: Vec<u8>,
104    }
105
106    impl V0ContentPage {
107        /// Maximum size of content in each page.
108        pub const MAX_CONTENT_SIZE: usize = V0SliceContentPage::MAX_CONTENT_SIZE;
109
110        /// Serialize an input slice into a sequence of content pages.
111        pub fn new_pages(input: &[u8]) -> impl Iterator<Item = V0ContentPage> + '_ {
112            input
113                .chunks(Self::MAX_CONTENT_SIZE)
114                .map(Vec::from)
115                .map(|contents| V0ContentPage { contents })
116        }
117    }
118
119    impl AsRef<[u8]> for V0ContentPage {
120        fn as_ref(&self) -> &[u8] {
121            self.contents.as_slice()
122        }
123    }
124
125    /// Hash page consisting of a dynamic number of [`PreimageHash`].
126    #[derive(Debug, HasEncoding, NomReader, BinWriter)]
127    pub struct V0HashPage {
128        #[encoding(dynamic, list)]
129        hashes: Vec<PreimageHash>,
130    }
131
132    impl V0HashPage {
133        /// Maximum number of hashes able to fit into a hash page.
134        pub const MAX_HASHES_PER_PAGE: usize = V0SliceHashPage::MAX_HASHES_PER_PAGE;
135
136        /// The list of hashes.
137        pub fn hashes(&self) -> &[PreimageHash] {
138            self.hashes.as_slice()
139        }
140
141        /// Serialize a hashes slice into a sequence of hash pages.
142        pub fn new_pages(
143            hashes: &[[u8; PREIMAGE_HASH_SIZE]],
144        ) -> impl Iterator<Item = V0HashPage> + '_ {
145            hashes
146                .chunks(Self::MAX_HASHES_PER_PAGE)
147                .map(|hashes| V0HashPage {
148                    hashes: hashes.iter().map(PreimageHash::from).collect(),
149                })
150        }
151    }
152
153    /// Generates the preimages of the given content.
154    pub fn prepare_preimages(
155        content: &[u8],
156        mut handle: impl FnMut(PreimageHash, Vec<u8>),
157    ) -> Result<PreimageHash, crypto::blake2b::Blake2bError> {
158        let mut hashes = Vec::new();
159
160        for chunk in content.chunks(V0ContentPage::MAX_CONTENT_SIZE) {
161            let page = Page::V0ContentPage(V0ContentPage {
162                contents: chunk.to_vec(),
163            });
164            let mut encoded = Vec::new();
165            page.bin_write(&mut encoded).unwrap();
166
167            let hash: Vec<u8> = make_preimage_hash(&encoded)?.into();
168            let hash = PreimageHash::from(hash);
169
170            hashes.push(hash.clone());
171
172            handle(hash, encoded);
173        }
174
175        while hashes.len() > 1 {
176            let curr_hashes = hashes;
177            hashes = Vec::new();
178
179            for hash_page in curr_hashes.chunks(V0HashPage::MAX_HASHES_PER_PAGE) {
180                let hash_page = Page::V0HashPage(V0HashPage {
181                    hashes: hash_page.to_vec(),
182                });
183                let mut encoded = Vec::new();
184                hash_page.bin_write(&mut encoded).unwrap();
185
186                let hash: Vec<u8> = make_preimage_hash(&encoded)?.into();
187                let hash = PreimageHash::from(hash);
188
189                hashes.push(hash.clone());
190
191                handle(hash, encoded);
192            }
193        }
194
195        Ok(hashes.remove(0))
196    }
197}
198
199/// Errors that may occur when dealing with [SlicePage].
200#[derive(Debug)]
201pub enum SlicePageError {
202    /// Unknown page tag.
203    InvalidTag(Option<u8>),
204    /// Invalid size prefix.
205    InvalidSizePrefix,
206}
207
208/// A Dac [Page] that borrows the underlying buffer.
209///
210/// Can be used in `no_std` & `alloc`-free environments.
211#[derive(Debug)]
212pub enum SlicePage<'a> {
213    /// Contents of borrowed bytes.
214    V0ContentPage(V0SliceContentPage<'a>),
215    /// Contents of borrowed hashes.
216    V0HashPage(V0SliceHashPage<'a>),
217}
218
219impl<'a> TryFrom<&'a [u8]> for SlicePage<'a> {
220    type Error = SlicePageError;
221
222    fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
223        match value {
224            [0, rest @ ..] => {
225                Ok(SlicePage::V0ContentPage(V0SliceContentPage::parse(rest)?))
226            }
227            [1, rest @ ..] => Ok(SlicePage::V0HashPage(V0SliceHashPage::parse(rest)?)),
228            _ => Err(SlicePageError::InvalidTag(value.first().cloned())),
229        }
230    }
231}
232
233/// Borrowing version of [V0ContentPage].
234#[derive(Debug)]
235pub struct V0SliceContentPage<'a> {
236    inner: &'a [u8],
237}
238
239impl<'a> V0SliceContentPage<'a> {
240    /// Maximum size of content in each page.
241    pub const MAX_CONTENT_SIZE: usize = MAX_USABLE_PAGE_SIZE;
242
243    // Assumes magic byte has been discarded
244    fn parse(slice: &'a [u8]) -> Result<Self, SlicePageError> {
245        if slice.len() < 4 {
246            return Err(SlicePageError::InvalidSizePrefix);
247        }
248
249        let size = u32::from_be_bytes([slice[0], slice[1], slice[2], slice[3]]) as usize;
250
251        let end_offset = 4 + size;
252
253        if slice.len() < end_offset {
254            return Err(SlicePageError::InvalidSizePrefix);
255        }
256
257        Ok(Self {
258            inner: &slice[4..end_offset],
259        })
260    }
261}
262
263impl<'a> AsRef<[u8]> for V0SliceContentPage<'a> {
264    fn as_ref(&self) -> &'a [u8] {
265        self.inner
266    }
267}
268
269/// Borrowing version of [V0HashPage].
270#[derive(Debug)]
271pub struct V0SliceHashPage<'a> {
272    // Guaranteed to be a multiple of PREIMAGE_HASH_SIZE
273    pub(crate) inner: &'a [u8],
274}
275
276impl<'a> V0SliceHashPage<'a> {
277    /// Maximum number of hashes able to fit into a hash page.
278    pub const MAX_HASHES_PER_PAGE: usize = MAX_USABLE_PAGE_SIZE / PREIMAGE_HASH_SIZE;
279
280    /// Returns an iterator over the preimage hashes contained within.
281    pub fn hashes(&self) -> impl Iterator<Item = &'a [u8; PREIMAGE_HASH_SIZE]> {
282        // there is a nightly(only) API called `as_chunks` that would return
283        // `(&[[u8; PREIMAPREIMAGE_HASH_SIZE]], &[u8])` that we could use in
284        // future
285        self.inner
286            .chunks_exact(PREIMAGE_HASH_SIZE)
287            .map(|chunk| chunk.try_into().expect("Guaranteed to be exact."))
288    }
289
290    // Assumes magic byte has been discarded
291    fn parse(slice: &'a [u8]) -> Result<Self, SlicePageError> {
292        if slice.len() < 4 {
293            return Err(SlicePageError::InvalidSizePrefix);
294        }
295
296        let size = u32::from_be_bytes([slice[0], slice[1], slice[2], slice[3]]) as usize;
297
298        let end_offset = 4 + size; // for prefix bytes
299
300        if slice.len() < end_offset || size % PREIMAGE_HASH_SIZE != 0 {
301            return Err(SlicePageError::InvalidSizePrefix);
302        }
303
304        Ok(Self {
305            inner: &slice[4..end_offset],
306        })
307    }
308}
309
310/// Fetches a page of data from file `hash` into `buffer` using [Runtime::reveal_preimage].
311/// Returns a tuple of the raw page and remaining buffer.
312pub fn fetch_page_raw<'a, Host: Runtime>(
313    host: &Host,
314    hash: &[u8; PREIMAGE_HASH_SIZE],
315    buffer: &'a mut [u8],
316) -> Result<(&'a [u8], &'a mut [u8]), RuntimeError> {
317    let size = Runtime::reveal_preimage(host, hash, buffer)?;
318    let (page, rest) = buffer.split_at_mut(size);
319
320    Ok((page, rest))
321}
322
323/// Recursively traverses a Merkle Tree of hashes up to `max_dac_levels` depth where each hash
324/// corresponds to a preimage that can be revealed via [Runtime::reveal_preimage]. The closure
325/// `save_content` is applied on each content page found.
326///
327/// N.B `max_dac_levels`, should probably be kept under 3 (3 gives 59MB of data). You are likely,
328/// however, to run into tick-limit issues while doing so, depending on what `save_content` does.
329///
330/// Instead, it is recommended to use `DacCertificate::reveal_to_store` where possible.
331pub fn reveal_loop<Host: Runtime>(
332    host: &mut Host,
333    level: usize,
334    hash: &[u8; PREIMAGE_HASH_SIZE],
335    rest: &mut [u8],
336    max_dac_levels: usize,
337    save_content: &mut impl FnMut(&mut Host, V0SliceContentPage) -> Result<(), &'static str>,
338) -> Result<(), &'static str> {
339    if level >= max_dac_levels {
340        return Err("DAC preimage tree contains too many levels.");
341    }
342
343    let (page, rest) =
344        fetch_page_raw(host, hash, rest).map_err(|_| "Failed to retrieve preimage")?;
345
346    let page = SlicePage::try_from(page)
347        .map_err(|_| "Unable to decode DAC page: Decode into SlicePage failed")?;
348    match page {
349        SlicePage::V0HashPage(hashes) => {
350            for hash in hashes.hashes() {
351                reveal_loop(host, level + 1, hash, rest, max_dac_levels, save_content)?;
352            }
353        }
354        SlicePage::V0ContentPage(content) => save_content(host, content)?,
355    }
356
357    Ok(())
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use tezos_data_encoding::enc::BinWriter;
364    use tezos_data_encoding::nom::NomReader;
365
366    // taken from DAC test example in tezos
367    const EXAMPLE_CONTENT_PAGE: &[u8] = &[
368        0, 0, 0, 0, b'A', b'L', b'o', b'r', b'e', b'm', b' ', b'i', b'p', b's', b'u',
369        b'm', b' ', b'd', b'o', b'l', b'o', b'r', b' ', b's', b'i', b't', b' ', b'a',
370        b'm', b'e', b't', b',', b' ', b'c', b'o', b'n', b's', b'e', b'c', b't', b'e',
371        b't', b'u', b'r', b' ', b'a', b'd', b'i', b'p', b'i', b's', b'c', b'i', b'n',
372        b'g', b' ', b'e', b'l', b'i', b't', b',', b' ', b's', b'e', b'd', b' ', b'd',
373        b'o', b' ', b'e',
374    ];
375
376    // taken from DAC test example in tezos
377    const EXAMPLE_HASH_PAGE: &[u8] = &[
378        1, 0, 0, 0, b'B', 0, b'r', 180, b'a', b'2', b'Z', b'(', 220, 14, 4, 220, b'{',
379        b'N', b'n', b'@', 183, b'#', b'!', 6, b'm', 204, b'p', 130, 162, 247, 246, 16,
380        b'l', 239, b'7', b'"', 249, 163, 0, 155, 167, 146, 19, 175, 28, b'V', 129, 247,
381        208, 31, b'F', b'd', 183, 194, 149, b'H', 163, b'|', 246, 164, 201, b'&', 195,
382        129, 24, 3, b'}', b'4', b't', 11, 213,
383    ];
384
385    #[test]
386    fn encode_decode_hash_page() {
387        let (_, page) =
388            Page::nom_read(EXAMPLE_HASH_PAGE).expect("Deserialization should work");
389        let mut buffer = Vec::new();
390        page.bin_write(&mut buffer)
391            .expect("Serialization should work");
392        assert_eq!(buffer.as_slice(), EXAMPLE_HASH_PAGE);
393    }
394
395    #[test]
396    fn encode_decode_contents_page() {
397        let (_, page) =
398            Page::nom_read(EXAMPLE_CONTENT_PAGE).expect("Deserialization should work");
399        let mut buffer = Vec::new();
400        page.bin_write(&mut buffer)
401            .expect("Serialization should work");
402        assert_eq!(buffer.as_slice(), EXAMPLE_CONTENT_PAGE);
403    }
404
405    #[test]
406    fn decoding_contents_over_slice() {
407        let (_, page) =
408            Page::nom_read(EXAMPLE_CONTENT_PAGE).expect("Deserialization should work");
409
410        let slice_page =
411            SlicePage::try_from(EXAMPLE_CONTENT_PAGE).expect("Should be content page");
412
413        match (&page, &slice_page) {
414            (Page::V0ContentPage(page), SlicePage::V0ContentPage(slice_page)) => {
415                assert_eq!(page.as_ref(), slice_page.inner)
416            }
417            _ => panic!(
418                "Should be content pages, got: {:?} & {:?}",
419                page, slice_page
420            ),
421        }
422    }
423
424    #[test]
425    fn decoding_hash_over_slice() {
426        let (_, page) =
427            Page::nom_read(EXAMPLE_HASH_PAGE).expect("Deserialization should work");
428
429        let slice_page =
430            SlicePage::try_from(EXAMPLE_HASH_PAGE).expect("Should be hash page");
431
432        match (&page, &slice_page) {
433            (Page::V0HashPage(page), SlicePage::V0HashPage(slice_page)) => {
434                let hashes: Vec<&[u8; PREIMAGE_HASH_SIZE]> =
435                    page.hashes().iter().map(|hash| hash.as_ref()).collect();
436                let slice_hashes: Vec<&[u8; PREIMAGE_HASH_SIZE]> =
437                    slice_page.hashes().collect();
438
439                assert_eq!(hashes, slice_hashes);
440            }
441            _ => panic!(
442                "Should be content pages, got: {:?} & {:?}",
443                page, slice_page
444            ),
445        }
446    }
447}