nydus_builder/
chunkdict_generator.rs

1// Copyright (C) 2023 Nydus Developers. All rights reserved.
2//
3// SPDX-License-Identifier: Apache-2.0
4
5//! Generate Chunkdict RAFS bootstrap.
6//! -------------------------------------------------------------------------------------------------
7//! Bug 1: Inconsistent Chunk Size Leading to Blob Size Less Than 4K(v6_block_size)
8//! Description: The size of chunks is not consistent, which results in the possibility that a blob,
9//! composed of a group of these chunks, may be less than 4K(v6_block_size) in size.
10//! This inconsistency leads to a failure in passing the size check.
11//! -------------------------------------------------------------------------------------------------
12//! Bug 2: Incorrect Chunk Number Calculation Due to Premature Check Logic
13//! Description: The current logic for calculating the chunk number is based on the formula size/chunk size.
14//! However, this approach is flawed as it precedes the actual check which accounts for chunk statistics.
15//! Consequently, this leads to inaccurate counting of chunk numbers.
16
17use super::core::node::{ChunkSource, NodeInfo};
18use super::{BlobManager, Bootstrap, BootstrapManager, BuildContext, BuildOutput, Tree};
19use crate::core::node::Node;
20use crate::NodeChunk;
21use crate::OsString;
22use anyhow::{Ok, Result};
23use nydus_rafs::metadata::chunk::ChunkWrapper;
24use nydus_rafs::metadata::inode::InodeWrapper;
25use nydus_rafs::metadata::layout::RafsXAttrs;
26use nydus_storage::meta::BlobChunkInfoV1Ondisk;
27use nydus_utils::compress::Algorithm;
28use nydus_utils::digest::RafsDigest;
29
30use std::mem::size_of;
31use std::path::PathBuf;
32use std::str::FromStr;
33use std::sync::Arc;
34
35#[derive(Debug, Clone, PartialEq, Eq, Hash)]
36pub struct ChunkdictChunkInfo {
37    pub image_reference: String,
38    pub version: String,
39    pub chunk_blob_id: String,
40    pub chunk_digest: String,
41    pub chunk_crc32: u32,
42    pub chunk_compressed_size: u32,
43    pub chunk_uncompressed_size: u32,
44    pub chunk_compressed_offset: u64,
45    pub chunk_uncompressed_offset: u64,
46}
47
48pub struct ChunkdictBlobInfo {
49    pub blob_id: String,
50    pub blob_compressed_size: u64,
51    pub blob_uncompressed_size: u64,
52    pub blob_compressor: String,
53    pub blob_meta_ci_compressed_size: u64,
54    pub blob_meta_ci_uncompressed_size: u64,
55    pub blob_meta_ci_offset: u64,
56}
57
58/// Struct to generate chunkdict RAFS bootstrap.
59pub struct Generator {}
60
61impl Generator {
62    // Generate chunkdict RAFS bootstrap.
63    pub fn generate(
64        ctx: &mut BuildContext,
65        bootstrap_mgr: &mut BootstrapManager,
66        blob_mgr: &mut BlobManager,
67        chunkdict_chunks_origin: Vec<ChunkdictChunkInfo>,
68        chunkdict_blobs: Vec<ChunkdictBlobInfo>,
69    ) -> Result<BuildOutput> {
70        // Validate and remove chunks whose belonged blob sizes are smaller than a block.
71        let mut chunkdict_chunks = chunkdict_chunks_origin.to_vec();
72        Self::validate_and_remove_chunks(ctx, &mut chunkdict_chunks);
73        // Build root tree.
74        let mut tree = Self::build_root_tree(ctx)?;
75
76        // Build child tree.
77        let child = Self::build_child_tree(ctx, blob_mgr, &chunkdict_chunks, &chunkdict_blobs)?;
78        let result = vec![child];
79        tree.children = result;
80
81        Self::validate_tree(&tree)?;
82
83        // Build bootstrap.
84        let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?;
85        let mut bootstrap = Bootstrap::new(tree)?;
86        bootstrap.build(ctx, &mut bootstrap_ctx)?;
87
88        let blob_table = blob_mgr.to_blob_table(ctx)?;
89        let storage = &mut bootstrap_mgr.bootstrap_storage;
90        bootstrap.dump(ctx, storage, &mut bootstrap_ctx, &blob_table)?;
91
92        BuildOutput::new(blob_mgr, None, &bootstrap_mgr.bootstrap_storage, &None)
93    }
94
95    /// Validate tree.
96    fn validate_tree(tree: &Tree) -> Result<()> {
97        let pre = &mut |t: &Tree| -> Result<()> {
98            let node = t.borrow_mut_node();
99            debug!("chunkdict tree: ");
100            debug!("inode: {}", node);
101            for chunk in &node.chunks {
102                debug!("\t chunk: {}", chunk);
103            }
104            Ok(())
105        };
106        tree.walk_dfs_pre(pre)?;
107        debug!("chunkdict tree is valid.");
108        Ok(())
109    }
110
111    /// Validates and removes chunks with a total uncompressed size smaller than the block size limit.
112    fn validate_and_remove_chunks(ctx: &mut BuildContext, chunkdict: &mut Vec<ChunkdictChunkInfo>) {
113        let mut chunk_sizes = std::collections::HashMap::new();
114
115        // Accumulate the uncompressed size for each chunk_blob_id.
116        for chunk in chunkdict.iter() {
117            *chunk_sizes.entry(chunk.chunk_blob_id.clone()).or_insert(0) +=
118                chunk.chunk_uncompressed_size as u64;
119        }
120        // Find all chunk_blob_ids with a total uncompressed size > v6_block_size.
121        let small_chunks: Vec<String> = chunk_sizes
122            .into_iter()
123            .filter(|&(_, size)| size < ctx.v6_block_size())
124            .inspect(|(id, _)| {
125                eprintln!(
126                    "Warning: Blob with id '{}' is smaller than {} bytes.",
127                    id,
128                    ctx.v6_block_size()
129                )
130            })
131            .map(|(id, _)| id)
132            .collect();
133
134        // Retain only chunks with chunk_blob_id that has a total uncompressed size > v6_block_size.
135        chunkdict.retain(|chunk| !small_chunks.contains(&chunk.chunk_blob_id));
136    }
137
138    /// Build the root tree.
139    pub fn build_root_tree(ctx: &mut BuildContext) -> Result<Tree> {
140        let mut inode = InodeWrapper::new(ctx.fs_version);
141        inode.set_ino(1);
142        inode.set_uid(1000);
143        inode.set_gid(1000);
144        inode.set_projid(0);
145        inode.set_mode(0o660 | libc::S_IFDIR as u32);
146        inode.set_nlink(3);
147        inode.set_name_size("/".len());
148        inode.set_rdev(0);
149        inode.set_blocks(256);
150        let node_info = NodeInfo {
151            explicit_uidgid: true,
152            src_dev: 0,
153            src_ino: 0,
154            rdev: 0,
155            source: PathBuf::from("/"),
156            path: PathBuf::from("/"),
157            target: PathBuf::from("/"),
158            target_vec: vec![OsString::from("/")],
159            symlink: None,
160            xattrs: RafsXAttrs::default(),
161            v6_force_extended_inode: true,
162        };
163        let root_node = Node::new(inode, node_info, 0);
164        let tree = Tree::new(root_node);
165        Ok(tree)
166    }
167
168    /// Build the child tree.
169    fn build_child_tree(
170        ctx: &mut BuildContext,
171        blob_mgr: &mut BlobManager,
172        chunkdict_chunks: &[ChunkdictChunkInfo],
173        chunkdict_blobs: &[ChunkdictBlobInfo],
174    ) -> Result<Tree> {
175        let mut inode = InodeWrapper::new(ctx.fs_version);
176        inode.set_ino(2);
177        inode.set_uid(0);
178        inode.set_gid(0);
179        inode.set_projid(0);
180        inode.set_mode(0o660 | libc::S_IFREG as u32);
181        inode.set_nlink(1);
182        inode.set_name_size("chunkdict".len());
183        inode.set_rdev(0);
184        inode.set_blocks(256);
185        let node_info = NodeInfo {
186            explicit_uidgid: true,
187            src_dev: 0,
188            src_ino: 1,
189            rdev: 0,
190            source: PathBuf::from("/"),
191            path: PathBuf::from("/chunkdict"),
192            target: PathBuf::from("/chunkdict"),
193            target_vec: vec![OsString::from("/"), OsString::from("/chunkdict")],
194            symlink: None,
195            xattrs: RafsXAttrs::new(),
196            v6_force_extended_inode: true,
197        };
198        let mut node = Node::new(inode, node_info, 0);
199
200        // Insert chunks.
201        Self::insert_chunks(ctx, blob_mgr, &mut node, chunkdict_chunks, chunkdict_blobs)?;
202        let node_size: u64 = node
203            .chunks
204            .iter()
205            .map(|chunk| chunk.inner.uncompressed_size() as u64)
206            .sum();
207        node.inode.set_size(node_size);
208
209        // Update child count.
210        node.inode.set_child_count(node.chunks.len() as u32);
211        let child = Tree::new(node);
212        child
213            .borrow_mut_node()
214            .v5_set_dir_size(ctx.fs_version, &child.children);
215        Ok(child)
216    }
217
218    /// Insert chunks.
219    fn insert_chunks(
220        ctx: &mut BuildContext,
221        blob_mgr: &mut BlobManager,
222        node: &mut Node,
223        chunkdict_chunks: &[ChunkdictChunkInfo],
224        chunkdict_blobs: &[ChunkdictBlobInfo],
225    ) -> Result<()> {
226        for (index, chunk_info) in chunkdict_chunks.iter().enumerate() {
227            let chunk_size: u32 = chunk_info.chunk_compressed_size;
228            let file_offset = index as u64 * chunk_size as u64;
229            let mut chunk = ChunkWrapper::new(ctx.fs_version);
230
231            // Update blob context.
232            let (blob_index, blob_ctx) =
233                blob_mgr.get_or_cerate_blob_for_chunkdict(ctx, &chunk_info.chunk_blob_id)?;
234            let chunk_uncompressed_size = chunk_info.chunk_uncompressed_size;
235            let pre_d_offset = blob_ctx.current_uncompressed_offset;
236            blob_ctx.uncompressed_blob_size = pre_d_offset + chunk_uncompressed_size as u64;
237            blob_ctx.current_uncompressed_offset += chunk_uncompressed_size as u64;
238
239            blob_ctx.blob_meta_header.set_ci_uncompressed_size(
240                blob_ctx.blob_meta_header.ci_uncompressed_size()
241                    + size_of::<BlobChunkInfoV1Ondisk>() as u64,
242            );
243            blob_ctx.blob_meta_header.set_ci_compressed_size(
244                blob_ctx.blob_meta_header.ci_uncompressed_size()
245                    + size_of::<BlobChunkInfoV1Ondisk>() as u64,
246            );
247            let chunkdict_blob_info = chunkdict_blobs
248                .iter()
249                .find(|blob| blob.blob_id == chunk_info.chunk_blob_id)
250                .unwrap();
251            blob_ctx.blob_compressor =
252                Algorithm::from_str(chunkdict_blob_info.blob_compressor.as_str())?;
253            blob_ctx
254                .blob_meta_header
255                .set_ci_uncompressed_size(chunkdict_blob_info.blob_meta_ci_uncompressed_size);
256            blob_ctx
257                .blob_meta_header
258                .set_ci_compressed_size(chunkdict_blob_info.blob_meta_ci_compressed_size);
259            blob_ctx
260                .blob_meta_header
261                .set_ci_compressed_offset(chunkdict_blob_info.blob_meta_ci_offset);
262            blob_ctx.blob_meta_header.set_ci_compressor(Algorithm::Zstd);
263
264            // Update chunk context.
265            let chunk_index = blob_ctx.alloc_chunk_index()?;
266            chunk.set_blob_index(blob_index);
267            chunk.set_index(chunk_index);
268            chunk.set_file_offset(file_offset);
269            chunk.set_compressed_size(chunk_info.chunk_compressed_size);
270            chunk.set_compressed_offset(chunk_info.chunk_compressed_offset);
271            chunk.set_uncompressed_size(chunk_info.chunk_uncompressed_size);
272            chunk.set_uncompressed_offset(chunk_info.chunk_uncompressed_offset);
273            chunk.set_id(RafsDigest::from_string(&chunk_info.chunk_digest));
274            chunk.set_crc32(chunk_info.chunk_crc32);
275
276            node.chunks.push(NodeChunk {
277                source: ChunkSource::Build,
278                inner: Arc::new(chunk.clone()),
279            });
280        }
281        Ok(())
282    }
283}