nydus_builder/
lib.rs

1// Copyright 2020 Ant Group. All rights reserved.
2//
3// SPDX-License-Identifier: Apache-2.0
4
5//! Builder to create RAFS filesystems from directories and tarballs.
6
7#[macro_use]
8extern crate log;
9
10use crate::core::context::Artifact;
11use std::ffi::OsString;
12use std::os::unix::ffi::OsStrExt;
13use std::path::{Path, PathBuf};
14
15use anyhow::{anyhow, Context, Result};
16use nydus_rafs::metadata::inode::InodeWrapper;
17use nydus_rafs::metadata::layout::RafsXAttrs;
18use nydus_rafs::metadata::{Inode, RafsVersion};
19use nydus_storage::meta::toc;
20use nydus_utils::digest::{DigestHasher, RafsDigest};
21use nydus_utils::{compress, digest, root_tracer, timing_tracer};
22use sha2::Digest;
23
24use self::core::node::{Node, NodeInfo};
25
26pub use self::chunkdict_generator::ChunkdictBlobInfo;
27pub use self::chunkdict_generator::ChunkdictChunkInfo;
28pub use self::chunkdict_generator::Generator;
29pub use self::compact::BlobCompactor;
30pub use self::compact::Config as CompactConfig;
31pub use self::core::bootstrap::Bootstrap;
32pub use self::core::chunk_dict::{parse_chunk_dict_arg, ChunkDict, HashChunkDict};
33pub use self::core::context::{
34    ArtifactStorage, ArtifactWriter, BlobCacheGenerator, BlobContext, BlobManager,
35    BootstrapContext, BootstrapManager, BuildContext, BuildOutput, ConversionType,
36};
37pub use self::core::feature::{Feature, Features};
38pub use self::core::node::{ChunkSource, NodeChunk};
39pub use self::core::overlay::{Overlay, WhiteoutSpec};
40pub use self::core::prefetch::{Prefetch, PrefetchPolicy};
41pub use self::core::tree::{MetadataTreeBuilder, Tree, TreeNode};
42pub use self::directory::DirectoryBuilder;
43pub use self::merge::Merger;
44pub use self::optimize_prefetch::generate_prefetch_file_info;
45pub use self::optimize_prefetch::update_ctx_from_bootstrap;
46pub use self::optimize_prefetch::OptimizePrefetch;
47pub use self::stargz::StargzBuilder;
48pub use self::tarball::TarballBuilder;
49
50pub mod attributes;
51mod chunkdict_generator;
52mod compact;
53mod core;
54mod directory;
55mod merge;
56mod optimize_prefetch;
57mod stargz;
58mod tarball;
59
60/// Trait to generate a RAFS filesystem from the source.
61pub trait Builder {
62    fn build(
63        &mut self,
64        build_ctx: &mut BuildContext,
65        bootstrap_mgr: &mut BootstrapManager,
66        blob_mgr: &mut BlobManager,
67    ) -> Result<BuildOutput>;
68}
69
70fn build_bootstrap(
71    ctx: &mut BuildContext,
72    bootstrap_mgr: &mut BootstrapManager,
73    bootstrap_ctx: &mut BootstrapContext,
74    blob_mgr: &mut BlobManager,
75    mut tree: Tree,
76) -> Result<Bootstrap> {
77    // For multi-layer build, merge the upper layer and lower layer with overlay whiteout applied.
78    if bootstrap_ctx.layered {
79        let mut parent = Bootstrap::load_parent_bootstrap(ctx, bootstrap_mgr, blob_mgr)?;
80        timing_tracer!({ parent.merge_overaly(ctx, tree) }, "merge_bootstrap")?;
81        tree = parent;
82    }
83
84    let mut bootstrap = Bootstrap::new(tree)?;
85    timing_tracer!({ bootstrap.build(ctx, bootstrap_ctx) }, "build_bootstrap")?;
86
87    Ok(bootstrap)
88}
89
90fn dump_bootstrap(
91    ctx: &mut BuildContext,
92    bootstrap_mgr: &mut BootstrapManager,
93    bootstrap_ctx: &mut BootstrapContext,
94    bootstrap: &mut Bootstrap,
95    blob_mgr: &mut BlobManager,
96    blob_writer: &mut dyn Artifact,
97) -> Result<()> {
98    // Make sure blob id is updated according to blob hash if not specified by user.
99    if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() {
100        if blob_ctx.blob_id.is_empty() {
101            // `Blob::dump()` should have set `blob_ctx.blob_id` to referenced OCI tarball for
102            // ref-type conversion.
103            assert!(!ctx.conversion_type.is_to_ref());
104            if ctx.blob_inline_meta {
105                // Set special blob id for blob with inlined meta.
106                blob_ctx.blob_id = "x".repeat(64);
107            } else {
108                blob_ctx.blob_id = format!("{:x}", blob_ctx.blob_hash.clone().finalize());
109            }
110        }
111        if !ctx.conversion_type.is_to_ref() {
112            blob_ctx.compressed_blob_size = blob_writer.pos()?;
113        }
114    }
115
116    // Dump bootstrap file
117    let blob_table = blob_mgr.to_blob_table(ctx)?;
118    let storage = &mut bootstrap_mgr.bootstrap_storage;
119    bootstrap.dump(ctx, storage, bootstrap_ctx, &blob_table)?;
120
121    // Dump RAFS meta to data blob if inline meta is enabled.
122    if ctx.blob_inline_meta {
123        assert_ne!(ctx.conversion_type, ConversionType::TarToTarfs);
124        // Ensure the blob object is created in case of no chunks generated for the blob.
125        let blob_ctx = if blob_mgr.external {
126            &mut blob_mgr.new_blob_ctx(ctx)?
127        } else {
128            let (_, blob_ctx) = blob_mgr
129                .get_or_create_current_blob(ctx)
130                .map_err(|_e| anyhow!("failed to get current blob object"))?;
131            blob_ctx
132        };
133        let bootstrap_offset = blob_writer.pos()?;
134        let uncompressed_bootstrap = bootstrap_ctx.writer.as_bytes()?;
135        let uncompressed_size = uncompressed_bootstrap.len();
136        let uncompressed_digest =
137            RafsDigest::from_buf(&uncompressed_bootstrap, digest::Algorithm::Sha256);
138
139        // Output uncompressed data for backward compatibility and compressed data for new format.
140        let (bootstrap_data, compressor) = if ctx.features.is_enabled(Feature::BlobToc) {
141            let mut compressor = compress::Algorithm::Zstd;
142            let (compressed_data, compressed) =
143                compress::compress(&uncompressed_bootstrap, compressor)
144                    .with_context(|| "failed to compress bootstrap".to_string())?;
145            blob_ctx.write_data(blob_writer, &compressed_data)?;
146            if !compressed {
147                compressor = compress::Algorithm::None;
148            }
149            (compressed_data, compressor)
150        } else {
151            blob_ctx.write_data(blob_writer, &uncompressed_bootstrap)?;
152            (uncompressed_bootstrap, compress::Algorithm::None)
153        };
154
155        let compressed_size = bootstrap_data.len();
156        blob_ctx.write_tar_header(
157            blob_writer,
158            toc::TOC_ENTRY_BOOTSTRAP,
159            compressed_size as u64,
160        )?;
161
162        if ctx.features.is_enabled(Feature::BlobToc) {
163            blob_ctx.entry_list.add(
164                toc::TOC_ENTRY_BOOTSTRAP,
165                compressor,
166                uncompressed_digest,
167                bootstrap_offset,
168                compressed_size as u64,
169                uncompressed_size as u64,
170            )?;
171        }
172    }
173
174    Ok(())
175}
176
177fn dump_toc(
178    ctx: &mut BuildContext,
179    blob_ctx: &mut BlobContext,
180    blob_writer: &mut dyn Artifact,
181) -> Result<()> {
182    if ctx.features.is_enabled(Feature::BlobToc) {
183        assert_ne!(ctx.conversion_type, ConversionType::TarToTarfs);
184        let mut hasher = RafsDigest::hasher(digest::Algorithm::Sha256);
185        let data = blob_ctx.entry_list.as_bytes().to_vec();
186        let toc_size = data.len() as u64;
187        blob_ctx.write_data(blob_writer, &data)?;
188        hasher.digest_update(&data);
189        let header = blob_ctx.write_tar_header(blob_writer, toc::TOC_ENTRY_BLOB_TOC, toc_size)?;
190        hasher.digest_update(header.as_bytes());
191        blob_ctx.blob_toc_digest = hasher.digest_finalize().data;
192        blob_ctx.blob_toc_size = toc_size as u32 + header.as_bytes().len() as u32;
193    }
194    Ok(())
195}
196
197fn finalize_blob(
198    ctx: &mut BuildContext,
199    blob_mgr: &mut BlobManager,
200    blob_writer: &mut dyn Artifact,
201) -> Result<()> {
202    if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() {
203        let is_tarfs = ctx.conversion_type == ConversionType::TarToTarfs;
204
205        if !is_tarfs {
206            dump_toc(ctx, blob_ctx, blob_writer)?;
207        }
208        if !ctx.conversion_type.is_to_ref() {
209            blob_ctx.compressed_blob_size = blob_writer.pos()?;
210        }
211        if ctx.blob_inline_meta && blob_ctx.blob_id == "x".repeat(64) {
212            blob_ctx.blob_id = String::new();
213        }
214
215        let hash = blob_ctx.blob_hash.clone().finalize();
216        let blob_meta_id = if ctx.blob_id.is_empty() {
217            format!("{:x}", hash)
218        } else {
219            assert!(!ctx.conversion_type.is_to_ref() || is_tarfs);
220            ctx.blob_id.clone()
221        };
222
223        if ctx.conversion_type.is_to_ref() {
224            if blob_ctx.blob_id.is_empty() {
225                // Use `sha256(tarball)` as `blob_id`. A tarball without files will fall through
226                // this path because `Blob::dump()` hasn't generated `blob_ctx.blob_id`.
227                if let Some(zran) = &ctx.blob_zran_generator {
228                    let reader = zran.lock().unwrap().reader();
229                    blob_ctx.compressed_blob_size = reader.get_data_size();
230                    if blob_ctx.blob_id.is_empty() {
231                        let hash = reader.get_data_digest();
232                        blob_ctx.blob_id = format!("{:x}", hash.finalize());
233                    }
234                } else if let Some(tar_reader) = &ctx.blob_tar_reader {
235                    blob_ctx.compressed_blob_size = tar_reader.position();
236                    if blob_ctx.blob_id.is_empty() {
237                        let hash = tar_reader.get_hash_object();
238                        blob_ctx.blob_id = format!("{:x}", hash.finalize());
239                    }
240                }
241            }
242            // Tarfs mode only has tar stream and meta blob, there's no data blob.
243            if !ctx.blob_inline_meta && !is_tarfs {
244                blob_ctx.blob_meta_digest = hash.into();
245                blob_ctx.blob_meta_size = blob_writer.pos()?;
246            }
247        } else if blob_ctx.blob_id.is_empty() {
248            // `blob_ctx.blob_id` should be RAFS blob id.
249            blob_ctx.blob_id = blob_meta_id.clone();
250        }
251
252        // Tarfs mode directly use the tar file as RAFS data blob, so no need to generate the data
253        // blob file.
254        if !is_tarfs {
255            blob_writer.finalize(Some(blob_meta_id))?;
256        }
257
258        if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() {
259            blob_cache.finalize(&blob_ctx.blob_id)?;
260        }
261    }
262    Ok(())
263}
264
265/// Helper for TarballBuilder/StargzBuilder to build the filesystem tree.
266pub struct TarBuilder {
267    pub explicit_uidgid: bool,
268    pub layer_idx: u16,
269    pub version: RafsVersion,
270    next_ino: Inode,
271}
272
273impl TarBuilder {
274    /// Create a new instance of [TarBuilder].
275    pub fn new(explicit_uidgid: bool, layer_idx: u16, version: RafsVersion) -> Self {
276        TarBuilder {
277            explicit_uidgid,
278            layer_idx,
279            next_ino: 0,
280            version,
281        }
282    }
283
284    /// Allocate an inode number.
285    pub fn next_ino(&mut self) -> Inode {
286        self.next_ino += 1;
287        self.next_ino
288    }
289
290    /// Insert a node into the tree, creating any missing intermediate directories.
291    pub fn insert_into_tree(&mut self, tree: &mut Tree, node: Node) -> Result<()> {
292        let target_paths = node.target_vec();
293        let target_paths_len = target_paths.len();
294
295        if target_paths_len == 1 {
296            // Handle root node modification
297            assert_eq!(node.path(), Path::new("/"));
298            tree.set_node(node);
299        } else {
300            let mut tmp_tree = tree;
301            for idx in 1..target_paths.len() {
302                match tmp_tree.get_child_idx(target_paths[idx].as_bytes()) {
303                    Some(i) => {
304                        if idx == target_paths_len - 1 {
305                            tmp_tree.children[i].set_node(node);
306                            break;
307                        } else {
308                            tmp_tree = &mut tmp_tree.children[i];
309                        }
310                    }
311                    None => {
312                        if idx == target_paths_len - 1 {
313                            tmp_tree.insert_child(Tree::new(node));
314                            break;
315                        } else {
316                            let node = self.create_directory(&target_paths[..=idx])?;
317                            tmp_tree.insert_child(Tree::new(node));
318                            let last_idx = tmp_tree.children.len() - 1;
319                            tmp_tree = &mut tmp_tree.children[last_idx];
320                        }
321                    }
322                }
323            }
324        }
325
326        Ok(())
327    }
328
329    /// Create a new node for a directory.
330    pub fn create_directory(&mut self, target_paths: &[OsString]) -> Result<Node> {
331        let ino = self.next_ino();
332        let name = &target_paths[target_paths.len() - 1];
333        let mut inode = InodeWrapper::new(self.version);
334        inode.set_ino(ino);
335        inode.set_mode(0o755 | libc::S_IFDIR as u32);
336        inode.set_nlink(2);
337        inode.set_name_size(name.len());
338        inode.set_rdev(u32::MAX);
339
340        let source = PathBuf::from("/");
341        let target_vec = target_paths.to_vec();
342        let mut target = PathBuf::new();
343        for name in target_paths.iter() {
344            target = target.join(name);
345        }
346        let info = NodeInfo {
347            explicit_uidgid: self.explicit_uidgid,
348            src_ino: ino,
349            src_dev: u64::MAX,
350            rdev: u64::MAX,
351            path: target.clone(),
352            source,
353            target,
354            target_vec,
355            symlink: None,
356            xattrs: RafsXAttrs::new(),
357            v6_force_extended_inode: false,
358        };
359
360        Ok(Node::new(inode, info, self.layer_idx))
361    }
362
363    /// Check whether the path is a eStargz special file.
364    pub fn is_stargz_special_files(&self, path: &Path) -> bool {
365        path == Path::new("/stargz.index.json")
366            || path == Path::new("/.prefetch.landmark")
367            || path == Path::new("/.no.prefetch.landmark")
368    }
369}
370
371#[cfg(test)]
372mod tests {
373    use vmm_sys_util::tempdir::TempDir;
374
375    use super::*;
376
377    #[test]
378    fn test_tar_builder_is_stargz_special_files() {
379        let builder = TarBuilder::new(true, 0, RafsVersion::V6);
380
381        let path = Path::new("/stargz.index.json");
382        assert!(builder.is_stargz_special_files(&path));
383        let path = Path::new("/.prefetch.landmark");
384        assert!(builder.is_stargz_special_files(&path));
385        let path = Path::new("/.no.prefetch.landmark");
386        assert!(builder.is_stargz_special_files(&path));
387
388        let path = Path::new("/no.prefetch.landmark");
389        assert!(!builder.is_stargz_special_files(&path));
390        let path = Path::new("/prefetch.landmark");
391        assert!(!builder.is_stargz_special_files(&path));
392        let path = Path::new("/tar.index.json");
393        assert!(!builder.is_stargz_special_files(&path));
394    }
395
396    #[test]
397    fn test_tar_builder_create_directory() {
398        let tmp_dir = TempDir::new().unwrap();
399        let target_paths = [OsString::from(tmp_dir.as_path())];
400        let mut builder = TarBuilder::new(true, 0, RafsVersion::V6);
401
402        let node = builder.create_directory(&target_paths);
403        assert!(node.is_ok());
404        let node = node.unwrap();
405        println!("Node: {}", node);
406        assert_eq!(node.file_type(), "dir");
407        assert_eq!(node.target(), tmp_dir.as_path());
408
409        assert_eq!(builder.next_ino, 1);
410        assert_eq!(builder.next_ino(), 2);
411    }
412}