nydus_utils/
verity.rs

1// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved.
2//
3// SPDX-License-Identifier: Apache-2.0
4
5//! Utilities to generate Merkle trees for data integrity verification.
6
7use std::fs::File;
8use std::io::Result;
9use std::mem::size_of;
10use std::sync::Mutex;
11
12use crate::digest::{Algorithm, DigestData, RafsDigest};
13use crate::div_round_up;
14use crate::filemap::FileMapState;
15
16const NON_EXIST_ENTRY_DIGEST: RafsDigest = RafsDigest {
17    data: [
18        173, 127, 172, 178, 88, 111, 198, 233, 102, 192, 4, 215, 209, 209, 107, 2, 79, 88, 5, 255,
19        124, 180, 124, 122, 133, 218, 189, 139, 72, 137, 44, 167,
20    ],
21};
22
23/// Struct to maintain and compute Merkle Tree topology and layout.
24pub struct MerkleTree {
25    digest_algo: Algorithm,
26    digest_per_page: u32,
27    digest_size: usize,
28    data_pages: u32,
29    page_size: u32,
30    max_levels: u32,
31}
32
33impl MerkleTree {
34    /// Create a new instance of `MerkleTree`.
35    pub fn new(page_size: u32, data_pages: u32, digest_algo: Algorithm) -> Self {
36        assert_eq!(page_size, 4096);
37        assert_eq!(digest_algo, Algorithm::Sha256);
38        let digest_size = 32;
39        let digest_shift = u32::trailing_zeros(page_size / digest_size);
40        let digest_per_page = 1u32 << digest_shift;
41
42        let mut max_levels = 0;
43        let mut tmp_pages = data_pages as u64;
44        while tmp_pages > 1 {
45            tmp_pages = div_round_up(tmp_pages, digest_per_page as u64);
46            max_levels += 1;
47        }
48
49        MerkleTree {
50            digest_algo,
51            digest_per_page: 1 << digest_shift,
52            digest_size: digest_size as usize,
53            page_size,
54            data_pages,
55            max_levels,
56        }
57    }
58
59    /// Get digest algorithm used to generate the Merkle tree.
60    pub fn digest_algorithm(&self) -> Algorithm {
61        self.digest_algo
62    }
63
64    /// Get height of the Merkle tree, 0 means there is only a root digest for one data page.
65    pub fn max_levels(&self) -> u32 {
66        self.max_levels
67    }
68
69    /// Get number of pages to store digest at specified Merkle tree level.
70    pub fn level_pages(&self, mut level: u32) -> u32 {
71        if level > self.max_levels {
72            0
73        } else {
74            let mut pages = self.data_pages as u64;
75            while level > 0 && pages > 0 {
76                pages = div_round_up(pages, self.digest_per_page as u64);
77                level -= 1;
78            }
79            pages as u32
80        }
81    }
82
83    /// Get number of digest entries at specified Merkle tree level.
84    pub fn level_entries(&self, level: u32) -> u32 {
85        if self.data_pages == 0 || level > self.max_levels {
86            0
87        } else {
88            self.level_index(level, self.data_pages - 1) + 1
89        }
90    }
91
92    /// Get entry index at the specified level covering the data page with index `page_index`.
93    pub fn level_index(&self, mut level: u32, mut page_index: u32) -> u32 {
94        if level <= 1 {
95            page_index
96        } else {
97            level -= 1;
98            while level > 0 {
99                page_index /= self.digest_per_page;
100                level -= 1;
101            }
102            page_index
103        }
104    }
105
106    /// Get base position of digest array for the specified Merkle tree level.
107    pub fn level_base(&self, level: u32) -> u64 {
108        if level >= self.max_levels {
109            0
110        } else {
111            let mut offset = 0;
112            let mut curr = self.max_levels;
113            while curr > level {
114                let pages = self.level_pages(curr);
115                offset += pages as u64 * self.page_size as u64;
116                curr -= 1;
117            }
118            offset
119        }
120    }
121
122    /// Get total pages needed to store the Merkle Tree.
123    pub fn total_pages(&self) -> u32 {
124        let mut pages = 0;
125        for idx in 1..=self.max_levels {
126            pages += self.level_pages(idx);
127        }
128        pages
129    }
130}
131
132/// Merkle tree generator for data integrity verification.
133pub struct VerityGenerator {
134    mkl_tree: MerkleTree,
135    file_map: Mutex<FileMapState>,
136    root_digest: RafsDigest,
137}
138
139impl VerityGenerator {
140    /// Create a new instance [VerityGenerator].
141    pub fn new(file: File, offset: u64, data_pages: u32) -> Result<Self> {
142        let mkl_tree = MerkleTree::new(4096, data_pages, Algorithm::Sha256);
143        let total_size = mkl_tree.total_pages() as usize * 4096;
144        let file_map = if data_pages > 1 {
145            if offset.checked_add(total_size as u64).is_none() {
146                return Err(einval!(format!(
147                    "verity data offset 0x{:x} and size 0x{:x} is too big",
148                    offset, total_size
149                )));
150            }
151
152            let md = file.metadata()?;
153            if md.len() < total_size as u64 + offset {
154                file.set_len(total_size as u64 + offset)?;
155            }
156            FileMapState::new(file, offset as libc::off_t, total_size, true)?
157        } else {
158            FileMapState::default()
159        };
160
161        Ok(VerityGenerator {
162            mkl_tree,
163            file_map: Mutex::new(file_map),
164            root_digest: NON_EXIST_ENTRY_DIGEST,
165        })
166    }
167
168    /// Initialize all digest values.
169    pub fn initialize(&mut self) -> Result<()> {
170        let total_size = self.mkl_tree.total_pages() as usize * 4096;
171        let mut offset = 0;
172        let mut map = self.file_map.lock().unwrap();
173
174        while offset < total_size {
175            let digest = map.get_mut::<DigestData>(offset)?;
176            digest.copy_from_slice(&NON_EXIST_ENTRY_DIGEST.data);
177            offset += size_of::<DigestData>();
178        }
179
180        Ok(())
181    }
182
183    /// Set digest value for Merkle entry at `level` with `index`.
184    ///
185    /// Digests for data pages must be set by calling this method. It can also be used to set
186    /// digest values for intermediate digest pages.
187    pub fn set_digest(&mut self, level: u32, index: u32, digest: &[u8]) -> Result<()> {
188        let digest_size = self.mkl_tree.digest_size;
189        if digest.len() != digest_size {
190            return Err(einval!(format!(
191                "size of digest data is not {}",
192                digest_size
193            )));
194        }
195
196        // Handle special case of zero-level Merkle tree.
197        if self.mkl_tree.data_pages == 1 && level == 1 && index == 0 {
198            self.root_digest.data.copy_from_slice(digest);
199            return Ok(());
200        }
201
202        if level > self.mkl_tree.max_levels() || level == 0 {
203            return Err(einval!(format!(
204                "level {} is out of range, max {}",
205                level,
206                self.mkl_tree.max_levels()
207            )));
208        } else if index >= self.mkl_tree.level_entries(level) {
209            return Err(einval!(format!(
210                "index {} is out of range, max {}",
211                index,
212                self.mkl_tree.level_entries(level) - 1
213            )));
214        }
215
216        let base = self.mkl_tree.level_base(level) as usize;
217        let offset = base + index as usize * digest_size;
218        let mut guard = self.file_map.lock().unwrap();
219        let buf = guard.get_mut::<DigestData>(offset)?;
220        buf.copy_from_slice(digest);
221
222        Ok(())
223    }
224
225    /// Generate digest values from lower level digest pages.
226    pub fn generate_level_digests(&mut self, level: u32) -> Result<()> {
227        assert!(level > 1 && level <= self.mkl_tree.max_levels);
228        let page_size = self.mkl_tree.page_size as usize;
229        let count = self.mkl_tree.level_entries(level) as usize;
230        let mut digest_base = self.mkl_tree.level_base(level) as usize;
231        let mut data_base = self.mkl_tree.level_base(level - 1) as usize;
232        let mut guard = self.file_map.lock().unwrap();
233
234        for _ in 0..count {
235            let data = guard.get_slice::<u8>(data_base, page_size)?;
236            let digest = RafsDigest::from_buf(data, self.mkl_tree.digest_algo);
237            let buf = guard.get_mut::<DigestData>(digest_base)?;
238            buf.copy_from_slice(digest.as_ref());
239            data_base += page_size;
240            digest_base += self.mkl_tree.digest_size;
241        }
242
243        Ok(())
244    }
245
246    /// Generate Merkle root digest.
247    ///
248    /// The returned Merkle tree root digest will be:
249    /// - `NON_EXIST_ENTRY_DIGEST` if there's no data page
250    /// - digest of the data page if there's only one data page
251    /// - digest of the intermediate digest page if there's more than one data pages
252    pub fn generate_root_digest(&mut self) -> Result<RafsDigest> {
253        if self.mkl_tree.max_levels == 0 {
254            Ok(self.root_digest)
255        } else {
256            let guard = self.file_map.lock().unwrap();
257            let data = guard.get_slice::<u8>(0, self.mkl_tree.page_size as usize)?;
258            Ok(RafsDigest::from_buf(data, self.mkl_tree.digest_algo))
259        }
260    }
261
262    /// Generate all intermediate and root digests for the Merkle tree.
263    ///
264    /// Digests for data pages at level 1 must be set up by calling [set_digest()] before this
265    /// function to generate intermediate and root digests.
266    pub fn generate_all_digests(&mut self) -> Result<RafsDigest> {
267        for level in 2..=self.mkl_tree.max_levels {
268            self.generate_level_digests(level)?;
269        }
270        self.generate_root_digest()
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use vmm_sys_util::tempfile::TempFile;
278
279    #[test]
280    fn test_max_levels() {
281        let mkl = MerkleTree::new(4096, 1, Algorithm::Sha256);
282        assert_eq!(mkl.max_levels(), 0);
283        assert_eq!(mkl.level_pages(0), 1);
284        assert_eq!(mkl.level_pages(1), 0);
285        assert_eq!(mkl.level_base(0), 0);
286        assert_eq!(mkl.level_base(1), 0);
287        assert_eq!(mkl.level_entries(0), 1);
288        assert_eq!(mkl.level_entries(1), 0);
289        assert_eq!(mkl.total_pages(), 0);
290
291        let mkl = MerkleTree::new(4096, 2, Algorithm::Sha256);
292        assert_eq!(mkl.max_levels(), 1);
293        assert_eq!(mkl.level_pages(0), 2);
294        assert_eq!(mkl.level_pages(1), 1);
295        assert_eq!(mkl.level_pages(2), 0);
296        assert_eq!(mkl.level_entries(0), 2);
297        assert_eq!(mkl.level_entries(1), 2);
298        assert_eq!(mkl.level_entries(2), 0);
299        assert_eq!(mkl.level_base(0), 4096);
300        assert_eq!(mkl.level_base(1), 0);
301        assert_eq!(mkl.level_base(2), 0);
302        assert_eq!(mkl.total_pages(), 1);
303
304        let mkl = MerkleTree::new(4096, 128, Algorithm::Sha256);
305        assert_eq!(mkl.max_levels(), 1);
306        assert_eq!(mkl.level_pages(0), 128);
307        assert_eq!(mkl.level_pages(1), 1);
308        assert_eq!(mkl.level_pages(2), 0);
309        assert_eq!(mkl.level_entries(0), 128);
310        assert_eq!(mkl.level_entries(1), 128);
311        assert_eq!(mkl.level_entries(2), 0);
312        assert_eq!(mkl.level_base(0), 4096);
313        assert_eq!(mkl.level_base(1), 0);
314        assert_eq!(mkl.level_base(2), 0);
315        assert_eq!(mkl.total_pages(), 1);
316
317        let mkl = MerkleTree::new(4096, 129, Algorithm::Sha256);
318        assert_eq!(mkl.max_levels(), 2);
319        assert_eq!(mkl.level_pages(0), 129);
320        assert_eq!(mkl.level_pages(1), 2);
321        assert_eq!(mkl.level_pages(2), 1);
322        assert_eq!(mkl.level_pages(3), 0);
323        assert_eq!(mkl.level_entries(0), 129);
324        assert_eq!(mkl.level_entries(1), 129);
325        assert_eq!(mkl.level_entries(2), 2);
326        assert_eq!(mkl.level_entries(3), 0);
327        assert_eq!(mkl.level_base(0), 4096 * 3);
328        assert_eq!(mkl.level_base(1), 4096);
329        assert_eq!(mkl.level_base(2), 0);
330        assert_eq!(mkl.level_base(3), 0);
331        assert_eq!(mkl.total_pages(), 3);
332
333        let mkl = MerkleTree::new(4096, 128 * 128, Algorithm::Sha256);
334        assert_eq!(mkl.max_levels(), 2);
335        assert_eq!(mkl.level_pages(0), 128 * 128);
336        assert_eq!(mkl.level_pages(1), 128);
337        assert_eq!(mkl.level_pages(2), 1);
338        assert_eq!(mkl.level_pages(3), 0);
339        assert_eq!(mkl.level_base(0), 4096 * 129);
340        assert_eq!(mkl.level_base(1), 4096);
341        assert_eq!(mkl.level_base(2), 0);
342        assert_eq!(mkl.level_base(3), 0);
343        assert_eq!(mkl.total_pages(), 129);
344
345        let mkl = MerkleTree::new(4096, 128 * 128 + 1, Algorithm::Sha256);
346        assert_eq!(mkl.max_levels(), 3);
347        assert_eq!(mkl.level_pages(0), 128 * 128 + 1);
348        assert_eq!(mkl.level_pages(1), 129);
349        assert_eq!(mkl.level_pages(2), 2);
350        assert_eq!(mkl.level_pages(3), 1);
351        assert_eq!(mkl.level_pages(4), 0);
352        assert_eq!(mkl.level_entries(0), 128 * 128 + 1);
353        assert_eq!(mkl.level_entries(1), 128 * 128 + 1);
354        assert_eq!(mkl.level_entries(2), 129);
355        assert_eq!(mkl.level_entries(3), 2);
356        assert_eq!(mkl.level_entries(4), 0);
357        assert_eq!(mkl.level_base(0), 4096 * 132);
358        assert_eq!(mkl.level_base(1), 4096 * 3);
359        assert_eq!(mkl.level_base(2), 4096);
360        assert_eq!(mkl.level_base(3), 0);
361        assert_eq!(mkl.level_base(4), 0);
362        assert_eq!(mkl.total_pages(), 132);
363
364        let mkl = MerkleTree::new(4096, u32::MAX, Algorithm::Sha256);
365        assert_eq!(mkl.max_levels(), 5);
366    }
367
368    #[test]
369    fn test_generate_mkl_tree_zero_entry() {
370        let digest = RafsDigest::from_buf(&[0u8; 4096], Algorithm::Sha256);
371        assert_eq!(digest, NON_EXIST_ENTRY_DIGEST);
372
373        let file = TempFile::new().unwrap();
374        let mut generator = VerityGenerator::new(file.into_file(), 0, 0).unwrap();
375
376        assert!(generator
377            .set_digest(0, 0, &NON_EXIST_ENTRY_DIGEST.data)
378            .is_err());
379        assert!(generator
380            .set_digest(1, 0, &NON_EXIST_ENTRY_DIGEST.data)
381            .is_err());
382
383        let root_digest = generator.generate_all_digests().unwrap();
384        assert_eq!(root_digest, NON_EXIST_ENTRY_DIGEST);
385    }
386
387    #[test]
388    fn test_generate_mkl_tree_one_entry() {
389        let file = TempFile::new().unwrap();
390        let mut generator = VerityGenerator::new(file.into_file(), 0, 1).unwrap();
391
392        let digest = RafsDigest::from_buf(&[1u8; 4096], Algorithm::Sha256);
393        assert!(generator.set_digest(0, 0, &digest.data).is_err());
394        assert!(generator.set_digest(2, 0, &digest.data).is_err());
395        assert!(generator.set_digest(1, 1, &digest.data).is_err());
396        generator.set_digest(1, 0, &digest.data).unwrap();
397
398        let root_digest = generator.generate_all_digests().unwrap();
399        assert_eq!(root_digest, digest);
400    }
401
402    #[test]
403    fn test_generate_mkl_tree_two_entries() {
404        let file = TempFile::new().unwrap();
405        let mut generator = VerityGenerator::new(file.into_file(), 0, 2).unwrap();
406
407        let digest = RafsDigest::from_buf(&[1u8; 4096], Algorithm::Sha256);
408        assert!(generator.set_digest(0, 0, &digest.data).is_err());
409        assert!(generator.set_digest(2, 0, &digest.data).is_err());
410        assert!(generator.set_digest(1, 2, &digest.data).is_err());
411        generator.set_digest(1, 0, &digest.data).unwrap();
412        generator.set_digest(1, 1, &digest.data).unwrap();
413
414        let root_digest = generator.generate_all_digests().unwrap();
415        assert_ne!(root_digest, digest);
416    }
417
418    #[test]
419    fn test_generate_mkl_tree_4097_entries() {
420        let file = TempFile::new().unwrap();
421        let mut generator = VerityGenerator::new(file.into_file(), 0, 4097).unwrap();
422
423        let digest = RafsDigest::from_buf(&[1u8; 4096], Algorithm::Sha256);
424        assert!(generator.set_digest(0, 0, &digest.data).is_err());
425        generator.set_digest(2, 0, &digest.data).unwrap();
426        for idx in 0..4097 {
427            generator.set_digest(1, idx, &digest.data).unwrap();
428        }
429
430        let root_digest = generator.generate_all_digests().unwrap();
431        assert_ne!(root_digest, digest);
432        assert_eq!(generator.mkl_tree.max_levels, 2);
433    }
434
435    #[test]
436    fn test_merkle_tree_digest_algo() {
437        let mkl = MerkleTree::new(4096, 1, Algorithm::Sha256);
438        assert_eq!(mkl.digest_algorithm(), Algorithm::Sha256);
439    }
440
441    #[test]
442    fn test_verity_generator_error() {
443        let file = TempFile::new().unwrap();
444        assert!(VerityGenerator::new(file.into_file(), u64::MAX, u32::MAX).is_err());
445
446        let file = TempFile::new().unwrap();
447        let mut generator = VerityGenerator::new(file.into_file(), 0, 4097).unwrap();
448        assert!(generator.set_digest(1, 0, &[1u8; 64]).is_err());
449    }
450
451    #[test]
452    fn test_verity_initialize() {
453        let file = TempFile::new().unwrap();
454        let mut generator = VerityGenerator::new(file.into_file(), 0, 4097).unwrap();
455        assert!(generator.initialize().is_ok());
456    }
457}