nydus_builder/core/
v6.rs

1// Copyright 2020 Ant Group. All rights reserved.
2// Copyright (C) 2021 Alibaba Cloud. All rights reserved.
3//
4// SPDX-License-Identifier: Apache-2.0
5
6use std::collections::BTreeMap;
7use std::ffi::{OsStr, OsString};
8use std::io::SeekFrom;
9use std::mem::size_of;
10use std::os::unix::ffi::OsStrExt;
11use std::sync::Arc;
12
13use anyhow::{bail, ensure, Context, Result};
14use nydus_rafs::metadata::chunk::ChunkWrapper;
15use nydus_rafs::metadata::layout::v6::{
16    align_offset, calculate_nid, new_v6_inode, RafsV6BlobTable, RafsV6Device, RafsV6Dirent,
17    RafsV6InodeChunkAddr, RafsV6InodeChunkHeader, RafsV6OndiskInode, RafsV6SuperBlock,
18    RafsV6SuperBlockExt, EROFS_BLOCK_BITS_9, EROFS_BLOCK_SIZE_4096, EROFS_BLOCK_SIZE_512,
19    EROFS_DEVTABLE_OFFSET, EROFS_INODE_CHUNK_BASED, EROFS_INODE_FLAT_INLINE,
20    EROFS_INODE_FLAT_PLAIN, EROFS_INODE_SLOT_SIZE, EROFS_SUPER_BLOCK_SIZE, EROFS_SUPER_OFFSET,
21};
22use nydus_rafs::metadata::RafsStore;
23use nydus_rafs::RafsIoWrite;
24use nydus_storage::device::{BlobFeatures, BlobInfo};
25use nydus_utils::{root_tracer, round_down, round_up, timing_tracer};
26
27use super::chunk_dict::DigestWithBlobIndex;
28use super::node::Node;
29use crate::{Bootstrap, BootstrapContext, BuildContext, ConversionType, Tree};
30
31const WRITE_PADDING_DATA: [u8; 4096] = [0u8; 4096];
32const V6_BLOCK_SEG_ALIGNMENT: u64 = 0x8_0000;
33
34// Rafs v6 dedicated methods
35impl Node {
36    /// Dump RAFS v6 inode metadata to meta blob.
37    pub fn dump_bootstrap_v6(
38        &mut self,
39        ctx: &mut BuildContext,
40        f_bootstrap: &mut dyn RafsIoWrite,
41        orig_meta_addr: u64,
42        meta_addr: u64,
43        chunk_cache: &mut BTreeMap<DigestWithBlobIndex, Arc<ChunkWrapper>>,
44        blobs: &[Arc<BlobInfo>],
45    ) -> Result<()> {
46        let xattr_inline_count = self.info.xattrs.count_v6();
47        ensure!(
48            xattr_inline_count <= u16::MAX as usize,
49            "size of extended attributes is too big"
50        );
51        let mut inode = new_v6_inode(
52            &self.inode,
53            self.v6_datalayout,
54            xattr_inline_count as u16,
55            self.v6_compact_inode,
56        );
57
58        let meta_offset = meta_addr - orig_meta_addr;
59        // update all the inodes's offset according to the new 'meta_addr'.
60        self.v6_offset += meta_offset;
61        // The EROFS_INODE_FLAT_INLINE layout is valid for directory and symlink only,
62        // so `dirents_offset` is useful for these two types too, otherwise `dirents_offset`
63        // should always be zero.
64        // Enforce the check to avoid overflow of `dirents_offset`.
65        if self.is_dir() || self.is_symlink() {
66            self.v6_dirents_offset += meta_offset;
67        }
68        let nid = calculate_nid(self.v6_offset, meta_addr);
69        self.inode.set_ino(nid);
70
71        if self.is_dir() {
72            self.v6_dump_dir(ctx, f_bootstrap, meta_addr, meta_offset, &mut inode)?;
73        } else if self.is_reg() {
74            self.v6_dump_file(ctx, f_bootstrap, chunk_cache, &mut inode, &blobs)?;
75        } else if self.is_symlink() {
76            self.v6_dump_symlink(ctx, f_bootstrap, &mut inode)?;
77        } else {
78            f_bootstrap
79                .seek(SeekFrom::Start(self.v6_offset))
80                .context("failed seek for dir inode")?;
81            inode.store(f_bootstrap).context("failed to store inode")?;
82            self.v6_store_xattrs(ctx, f_bootstrap)?;
83        }
84
85        Ok(())
86    }
87
88    /// Update whether compact mode can be used for this inode or not.
89    pub fn v6_set_inode_compact(&mut self) {
90        self.v6_compact_inode = !(self.info.v6_force_extended_inode
91            || self.inode.uid() > u16::MAX as u32
92            || self.inode.gid() > u16::MAX as u32
93            || self.inode.nlink() > u16::MAX as u32
94            || self.inode.size() > u32::MAX as u64
95            || self.path().extension() == Some(OsStr::new("pyc")));
96    }
97
98    /// Layout the normal inode (except directory inode) into the meta blob.
99    pub fn v6_set_offset(
100        &mut self,
101        bootstrap_ctx: &mut BootstrapContext,
102        v6_hardlink_offset: Option<u64>,
103        block_size: u64,
104    ) -> Result<()> {
105        ensure!(!self.is_dir(), "{} is a directory", self.path().display());
106        if self.is_reg() {
107            if let Some(v6_hardlink_offset) = v6_hardlink_offset {
108                self.v6_offset = v6_hardlink_offset;
109            } else {
110                let size = self.v6_size_with_xattr();
111                let unit = size_of::<RafsV6InodeChunkAddr>() as u64;
112                let total_size = round_up(size, unit) + self.inode.child_count() as u64 * unit;
113                // First try to allocate from fragments of dirent pages.
114                self.v6_offset = bootstrap_ctx.allocate_available_block(total_size, block_size);
115                if self.v6_offset == 0 {
116                    self.v6_offset = bootstrap_ctx.offset;
117                    bootstrap_ctx.offset += total_size;
118                }
119            }
120            self.v6_datalayout = EROFS_INODE_CHUNK_BASED;
121        } else if self.is_symlink() {
122            self.v6_set_offset_with_tail(bootstrap_ctx, self.inode.size(), block_size);
123        } else {
124            self.v6_offset = bootstrap_ctx.offset;
125            bootstrap_ctx.offset += self.v6_size_with_xattr();
126        }
127        bootstrap_ctx.align_offset(EROFS_INODE_SLOT_SIZE as u64);
128
129        Ok(())
130    }
131
132    /// Layout the directory inode and its dirents into meta blob.
133    pub fn v6_set_dir_offset(
134        &mut self,
135        bootstrap_ctx: &mut BootstrapContext,
136        d_size: u64,
137        block_size: u64,
138    ) -> Result<()> {
139        ensure!(
140            self.is_dir(),
141            "{} is not a directory",
142            self.path().display()
143        );
144
145        // Dir isize is the total bytes of 'dirents + names'.
146        self.inode.set_size(d_size);
147        self.v6_set_offset_with_tail(bootstrap_ctx, d_size, block_size);
148        bootstrap_ctx.align_offset(EROFS_INODE_SLOT_SIZE as u64);
149
150        Ok(())
151    }
152
153    /// Calculate space needed to store dirents of the directory inode.
154    pub fn v6_dirent_size(&self, ctx: &mut BuildContext, tree: &Tree) -> Result<u64> {
155        ensure!(self.is_dir(), "{} is not a directory", self);
156        let block_size = ctx.v6_block_size();
157        let mut d_size = 0;
158
159        // Sort all children if "." and ".." are not at the head after sorting.
160        if !tree.children.is_empty() && tree.children[0].name() < "..".as_bytes() {
161            let mut children = Vec::with_capacity(tree.children.len() + 2);
162            children.push(".".as_bytes());
163            children.push("..".as_bytes());
164            for child in tree.children.iter() {
165                children.push(child.name());
166            }
167            children.sort_unstable();
168
169            for c in children {
170                // Use length in byte, instead of length in character.
171                let len = c.len() + size_of::<RafsV6Dirent>();
172                // erofs disk format requires dirent to be aligned to block size.
173                if (d_size % block_size) + len as u64 > block_size {
174                    d_size = round_up(d_size as u64, block_size);
175                }
176                d_size += len as u64;
177            }
178        } else {
179            // Avoid sorting again if "." and ".." are at the head after sorting due to that
180            // `tree.children` has already been sorted.
181            d_size =
182                (".".len() + size_of::<RafsV6Dirent>() + "..".len() + size_of::<RafsV6Dirent>())
183                    as u64;
184            for child in tree.children.iter() {
185                let len = child.name().len() + size_of::<RafsV6Dirent>();
186                // erofs disk format requires dirent to be aligned to block size.
187                if (d_size % block_size) + len as u64 > block_size {
188                    d_size = round_up(d_size as u64, block_size);
189                }
190                d_size += len as u64;
191            }
192        }
193
194        Ok(d_size)
195    }
196
197    fn v6_size_with_xattr(&self) -> u64 {
198        self.inode
199            .get_inode_size_with_xattr(&self.info.xattrs, self.v6_compact_inode) as u64
200    }
201
202    // Layout symlink or directory inodes into the meta blob.
203    //
204    // For DIR inode, size is the total bytes of 'dirents + names'.
205    // For symlink, size is the length of symlink name.
206    fn v6_set_offset_with_tail(
207        &mut self,
208        bootstrap_ctx: &mut BootstrapContext,
209        d_size: u64,
210        block_size: u64,
211    ) {
212        //          |    avail       |
213        // +--------+-----------+----+ +-----------------------+
214        // |        |inode+tail | free |   dirents+names       |
215        // |        |           |    | |                       |
216        // +--------+-----------+----+ +-----------------------+
217        //
218        //          |    avail       |
219        // +--------+-----------+----+ +-----------------------+ +---------+-------------+
220        // |        |inode      | free |   dirents+names       | | tail    | free        |
221        // |        |           |    | |                       | |         |             |
222        // +--------+-----------+----+ +-----------------------+ +---------+-------------+
223        //
224        //
225        //          |    avail       |
226        // +--------+-----------+----+ +-----------------------+ +---------+-------------+
227        // |        |     inode      + |   dirents+names       | | tail    | free        |
228        // |        |                | |                       | |         |             |
229        // +--------+-----------+----+ +-----------------------+ +---------+-------------+
230        //
231        //
232        //          |    avail       |
233        // +--------+----------------+ +--------------+--------+ +-----------------------+
234        // |        |     inode      | |  inode+tail  | free   | | dirents+names         |
235        // |        |                | |              |        | |                       |
236        // +--------+----------------+ +--------------+--------+ +-----------------------+
237        //          |         inode                   |
238        //
239        //          |    avail       |
240        // +--------+----------------+ +--------------+--------+ +-----------------------+ +-------+---------------+
241        // |        |     inode      | |  inode       | free   | | dirents+names         | | tail  |    free       |
242        // |        |                | |              |        | |                       | |       |               |
243        // +--------+----------------+ +--------------+--------+ +-----------------------+ +-------+---------------+
244        //          |         inode                   |
245        //
246        //
247        let inode_size = self.v6_size_with_xattr();
248        let tail: u64 = d_size % block_size;
249
250        // We use a simple inline strategy here:
251        // If the inode size with xattr + tail data size <= EROFS_BLOCK_SIZE,
252        // we choose to inline it.
253        // Firstly, if it's bigger than EROFS_BLOCK_SIZE,
254        // in most cases, we can assume that the tail data size is close to EROFS_BLOCK_SIZE,
255        // in this condition, even if we don't inline the tail data, there won't be much waste.
256        // Secondly, the `available_blocks` that we maintain in the `BootstrapCtx`,
257        // since it contain only single blocks with some unused space, the available space can only
258        // be smaller than EROFS_BLOCK_SIZE, therefore we can't use our used blocks to store the
259        // inode plus the tail data bigger than EROFS_BLOCK_SIZE.
260        let should_inline = tail != 0 && (inode_size + tail) <= block_size;
261
262        // If should inline, we first try to allocate space for the inode together with tail data
263        // using used blocks.
264        // If no available used block exists, we try to allocate space from current block.
265        // If current block doesn't have enough space, we append it to `available_blocks`,
266        // and we allocate space from the next block.
267        // For the remaining data, we allocate space for it sequentially.
268        self.v6_datalayout = if should_inline {
269            self.v6_offset = bootstrap_ctx.allocate_available_block(inode_size + tail, block_size);
270            if self.v6_offset == 0 {
271                let available = block_size - bootstrap_ctx.offset % block_size;
272                if available < inode_size + tail {
273                    bootstrap_ctx.append_available_block(bootstrap_ctx.offset, block_size);
274                    bootstrap_ctx.align_offset(block_size);
275                }
276
277                self.v6_offset = bootstrap_ctx.offset;
278                bootstrap_ctx.offset += inode_size + tail;
279            }
280
281            if d_size != tail {
282                bootstrap_ctx.append_available_block(bootstrap_ctx.offset, block_size);
283                bootstrap_ctx.align_offset(block_size);
284            }
285            self.v6_dirents_offset = bootstrap_ctx.offset;
286            bootstrap_ctx.offset += round_down(d_size, block_size);
287
288            EROFS_INODE_FLAT_INLINE
289        } else {
290            // Otherwise, we first try to allocate space for the inode from used blocks.
291            // If no available used block exists, we allocate space sequentially.
292            // Then we allocate space for all data.
293            self.v6_offset = bootstrap_ctx.allocate_available_block(inode_size, block_size);
294            if self.v6_offset == 0 {
295                self.v6_offset = bootstrap_ctx.offset;
296                bootstrap_ctx.offset += inode_size;
297            }
298
299            bootstrap_ctx.append_available_block(bootstrap_ctx.offset, block_size);
300            bootstrap_ctx.align_offset(block_size);
301            self.v6_dirents_offset = bootstrap_ctx.offset;
302            bootstrap_ctx.offset += d_size;
303            bootstrap_ctx.align_offset(block_size);
304
305            EROFS_INODE_FLAT_PLAIN
306        };
307
308        trace!(
309            "{:?} inode offset {} ctx offset {} d_size {} dirents_offset {} datalayout {}",
310            self.name(),
311            self.v6_offset,
312            bootstrap_ctx.offset,
313            d_size,
314            self.v6_dirents_offset,
315            self.v6_datalayout
316        );
317    }
318
319    fn v6_store_xattrs(
320        &mut self,
321        ctx: &mut BuildContext,
322        f_bootstrap: &mut dyn RafsIoWrite,
323    ) -> Result<()> {
324        if !self.info.xattrs.is_empty() {
325            self.info
326                .xattrs
327                .store_v6(f_bootstrap)
328                .context("failed to dump xattr to bootstrap")?;
329            ctx.has_xattr = true;
330        }
331        Ok(())
332    }
333
334    fn v6_dump_dir(
335        &mut self,
336        ctx: &mut BuildContext,
337        f_bootstrap: &mut dyn RafsIoWrite,
338        meta_addr: u64,
339        meta_offset: u64,
340        inode: &mut Box<dyn RafsV6OndiskInode>,
341    ) -> Result<()> {
342        // the 1st 4k block after dir inode.
343        let mut dirent_off = self.v6_dirents_offset;
344        let blk_addr = ctx
345            .v6_block_addr(dirent_off)
346            .with_context(|| format!("failed to compute blk_addr for offset 0x{:x}", dirent_off))?;
347        inode.set_u(blk_addr);
348        self.v6_dump_inode(ctx, f_bootstrap, inode)
349            .context("failed to dump inode for directory")?;
350
351        // Dump dirents
352        let mut dir_data: Vec<u8> = Vec::new();
353        let mut entry_names = Vec::new();
354        let mut dirents: Vec<(RafsV6Dirent, &OsString)> = Vec::new();
355        let mut nameoff: u64 = 0;
356        let mut used: u64 = 0;
357        let block_size = ctx.v6_block_size();
358
359        trace!(
360            "{:?} self.dirents.len {}",
361            self.target(),
362            self.v6_dirents.len()
363        );
364        // fill dir blocks one by one
365        for (offset, name, file_type) in self.v6_dirents.iter() {
366            let len = name.as_bytes().len() + size_of::<RafsV6Dirent>();
367            // write to bootstrap when it will exceed EROFS_BLOCK_SIZE
368            if used + len as u64 > block_size {
369                for (entry, name) in dirents.iter_mut() {
370                    trace!("{:?} nameoff {}", name, nameoff);
371                    entry.set_name_offset(nameoff as u16);
372                    dir_data.extend(entry.as_ref());
373                    entry_names.push(*name);
374                    // Use length in byte, instead of length in character.
375                    // Because some characters could occupy more than one byte.
376                    nameoff += name.as_bytes().len() as u64;
377                }
378                for name in entry_names.iter() {
379                    dir_data.extend(name.as_bytes());
380                }
381
382                f_bootstrap
383                    .seek(SeekFrom::Start(dirent_off as u64))
384                    .context("failed seek file position for writing dirent")?;
385                f_bootstrap
386                    .write(dir_data.as_slice())
387                    .context("failed to write dirent data to meta blob")?;
388
389                // track where we're going to write.
390                dirent_off += round_up(used, block_size);
391                used = 0;
392                nameoff = 0;
393                dir_data.clear();
394                entry_names.clear();
395                dirents.clear();
396            }
397
398            trace!(
399                "name {:?} file type {} {:?}",
400                *name,
401                *file_type,
402                RafsV6Dirent::file_type(*file_type)
403            );
404            let entry = RafsV6Dirent::new(
405                calculate_nid(*offset + meta_offset, meta_addr),
406                0,
407                RafsV6Dirent::file_type(*file_type),
408            );
409            dirents.push((entry, name));
410
411            nameoff += size_of::<RafsV6Dirent>() as u64;
412            used += len as u64;
413        }
414
415        trace!(
416            "{:?} used {} dir size {}",
417            self.target(),
418            used,
419            self.inode.size()
420        );
421        // dump tail part if any
422        if used > 0 {
423            for (entry, name) in dirents.iter_mut() {
424                trace!("{:?} tail nameoff {}", name, nameoff);
425                entry.set_name_offset(nameoff as u16);
426                dir_data.extend(entry.as_ref());
427                entry_names.push(*name);
428                nameoff += name.as_bytes().len() as u64;
429            }
430            for name in entry_names.iter() {
431                dir_data.extend(name.as_bytes());
432            }
433
434            let tail_off = match self.v6_datalayout {
435                EROFS_INODE_FLAT_INLINE => self.v6_offset + self.v6_size_with_xattr(),
436                EROFS_INODE_FLAT_PLAIN => dirent_off,
437                _ => bail!("unsupported RAFS v6 inode layout for directory"),
438            };
439            f_bootstrap
440                .seek(SeekFrom::Start(tail_off as u64))
441                .context("failed seek for dir inode")?;
442            f_bootstrap
443                .write(dir_data.as_slice())
444                .context("failed to store dirents")?;
445        }
446
447        Ok(())
448    }
449
450    fn v6_dump_file(
451        &mut self,
452        ctx: &mut BuildContext,
453        f_bootstrap: &mut dyn RafsIoWrite,
454        chunk_cache: &mut BTreeMap<DigestWithBlobIndex, Arc<ChunkWrapper>>,
455        inode: &mut Box<dyn RafsV6OndiskInode>,
456        blobs: &[Arc<BlobInfo>],
457    ) -> Result<()> {
458        let mut is_continuous = true;
459        let mut prev = None;
460
461        // write chunk indexes, chunk contents has been written to blob file.
462        let mut chunks: Vec<u8> = Vec::new();
463        for chunk in self.chunks.iter() {
464            let offset = chunk.inner.uncompressed_offset();
465            let blk_addr = ctx.v6_block_addr(offset).with_context(|| {
466                format!(
467                    "failed to compute blk_addr for chunk with uncompressed offset 0x{:x}",
468                    offset
469                )
470            })?;
471            let blob_idx = chunk.inner.blob_index();
472            let mut v6_chunk = RafsV6InodeChunkAddr::new();
473            v6_chunk.set_blob_index(blob_idx);
474            v6_chunk.set_blob_ci_index(chunk.inner.index());
475            v6_chunk.set_block_addr(blk_addr);
476
477            chunks.extend(v6_chunk.as_ref());
478            let external =
479                blobs[chunk.inner.blob_index() as usize].has_feature(BlobFeatures::EXTERNAL);
480            let chunk_index = if external {
481                Some(chunk.inner.index())
482            } else {
483                None
484            };
485            chunk_cache.insert(
486                DigestWithBlobIndex(*chunk.inner.id(), chunk.inner.blob_index() + 1, chunk_index),
487                chunk.inner.clone(),
488            );
489            if let Some((prev_idx, prev_pos)) = prev {
490                if prev_pos + ctx.chunk_size as u64 != offset || prev_idx != blob_idx {
491                    is_continuous = false;
492                }
493            }
494            prev = Some((blob_idx, offset));
495        }
496
497        // Special optimization to enable page cache sharing for EROFS.
498        let chunk_size = if is_continuous && inode.size() > ctx.chunk_size as u64 {
499            inode.size().next_power_of_two()
500        } else {
501            ctx.chunk_size as u64
502        };
503        let info = RafsV6InodeChunkHeader::new(chunk_size, ctx.v6_block_size());
504        inode.set_u(info.to_u32());
505        self.v6_dump_inode(ctx, f_bootstrap, inode)
506            .context("failed to dump inode for file")?;
507
508        let unit = size_of::<RafsV6InodeChunkAddr>() as u64;
509        let offset = align_offset(self.v6_offset + self.v6_size_with_xattr(), unit);
510        f_bootstrap
511            .seek(SeekFrom::Start(offset))
512            .with_context(|| format!("failed to seek to 0x{:x} for writing chunk data", offset))?;
513        f_bootstrap
514            .write(chunks.as_slice())
515            .context("failed to write chunk data for file")?;
516
517        Ok(())
518    }
519
520    fn v6_dump_symlink(
521        &mut self,
522        ctx: &mut BuildContext,
523        f_bootstrap: &mut dyn RafsIoWrite,
524        inode: &mut Box<dyn RafsV6OndiskInode>,
525    ) -> Result<()> {
526        let blk_addr = ctx.v6_block_addr(self.v6_dirents_offset)?;
527        inode.set_u(blk_addr);
528        self.v6_dump_inode(ctx, f_bootstrap, inode)
529            .context("failed to dump inode for symlink")?;
530
531        if let Some(symlink) = &self.info.symlink {
532            let tail_off = match self.v6_datalayout {
533                EROFS_INODE_FLAT_INLINE => self.v6_offset + self.v6_size_with_xattr(),
534                EROFS_INODE_FLAT_PLAIN => self.v6_dirents_offset,
535                _ => bail!("unsupported RAFS v5 inode layout for symlink"),
536            };
537            f_bootstrap
538                .seek(SeekFrom::Start(tail_off))
539                .context("failed seek for dir inode")?;
540            f_bootstrap
541                .write(symlink.as_bytes())
542                .context("filed to store symlink")?;
543        }
544
545        Ok(())
546    }
547
548    fn v6_dump_inode(
549        &mut self,
550        ctx: &mut BuildContext,
551        f_bootstrap: &mut dyn RafsIoWrite,
552        inode: &mut Box<dyn RafsV6OndiskInode>,
553    ) -> Result<()> {
554        f_bootstrap
555            .seek(SeekFrom::Start(self.v6_offset))
556            .context("failed to seek file position for writing inode")?;
557        inode
558            .store(f_bootstrap)
559            .context("failed to write inode to meta blob")?;
560        self.v6_store_xattrs(ctx, f_bootstrap)
561            .context("failed to write extended attributes for inode")
562    }
563}
564
565impl BuildContext {
566    pub fn v6_block_size(&self) -> u64 {
567        if self.conversion_type == ConversionType::TarToTarfs {
568            // Tar stream is 512-byte aligned.
569            EROFS_BLOCK_SIZE_512
570        } else {
571            EROFS_BLOCK_SIZE_4096
572        }
573    }
574
575    pub fn v6_block_addr(&self, offset: u64) -> Result<u32> {
576        let blk_addr = offset / self.v6_block_size();
577        if blk_addr > u32::MAX as u64 {
578            bail!("v6 block address 0x{:x} is too big", blk_addr)
579        } else {
580            Ok(blk_addr as u32)
581        }
582    }
583}
584
585impl Bootstrap {
586    pub(crate) fn v6_update_dirents(parent: &Tree, parent_offset: u64) {
587        let mut node = parent.borrow_mut_node();
588        let node_offset = node.v6_offset;
589        if !node.is_dir() {
590            return;
591        }
592
593        // dot & dotdot
594        // Type of libc::S_IFDIR is u16 on macos, so it need a conversion
595        // but compiler will report useless conversion on linux platform,
596        // so we add an allow annotation here.
597        #[allow(clippy::useless_conversion)]
598        {
599            node.v6_dirents
600                .push((node_offset, OsString::from("."), libc::S_IFDIR.into()));
601            node.v6_dirents
602                .push((parent_offset, OsString::from(".."), libc::S_IFDIR.into()));
603        }
604
605        let mut dirs: Vec<&Tree> = Vec::new();
606        for child in parent.children.iter() {
607            let child_node = child.borrow_mut_node();
608            let entry = (
609                child_node.v6_offset,
610                OsStr::from_bytes(child.name()).to_owned(),
611                child_node.inode.mode(),
612            );
613            node.v6_dirents.push(entry);
614            if child_node.is_dir() {
615                dirs.push(child);
616            }
617        }
618        node.v6_dirents
619            .sort_unstable_by(|a, b| a.1.as_os_str().cmp(b.1.as_os_str()));
620
621        for dir in dirs {
622            Self::v6_update_dirents(dir, node_offset);
623        }
624    }
625
626    /// Dump bootstrap and blob file, return (Vec<blob_id>, blob_size)
627    pub(crate) fn v6_dump(
628        &mut self,
629        ctx: &mut BuildContext,
630        bootstrap_ctx: &mut BootstrapContext,
631        blob_table: &RafsV6BlobTable,
632    ) -> Result<()> {
633        // Rafs v6 disk layout
634        //
635        //  EROFS_SUPER_OFFSET
636        //     |
637        // +---+---------+------------+-------------+----------------------------------------------+
638        // |   |         |            |             |                 |         |                  |
639        // |1k |super    |extended    | blob table  |  prefetch table | inodes  | chunk info table |
640        // |   |block    |superblock+ |             |                 |         |                  |
641        // |   |         |devslot     |             |                 |         |                  |
642        // +---+---------+------------+-------------+----------------------------------------------+
643
644        let block_size = ctx.v6_block_size();
645        let blobs = blob_table.get_all();
646        let devtable_len = blobs.len() * size_of::<RafsV6Device>();
647        let blob_table_size = blob_table.size() as u64;
648        let blob_table_offset = align_offset(
649            (EROFS_DEVTABLE_OFFSET as u64) + devtable_len as u64,
650            EROFS_BLOCK_SIZE_4096,
651        );
652        let blob_table_entries = blobs.len();
653        assert!(blob_table_entries < u8::MAX as usize);
654        trace!(
655            "devtable len {} blob table offset {} blob table size {}",
656            devtable_len,
657            blob_table_offset,
658            blob_table_size
659        );
660
661        let fs_prefetch_rule_count = ctx.prefetch.fs_prefetch_rule_count();
662        let (prefetch_table_offset, prefetch_table_size) =
663            // If blob_table_size equal to 0, there is no prefetch.
664            if fs_prefetch_rule_count > 0 && blob_table_size > 0 {
665                // Prefetch table is very close to blob devices table
666                let offset = blob_table_offset + blob_table_size;
667                // Each prefetched file has is nid of `u32` filled into prefetch table.
668                let size = fs_prefetch_rule_count * size_of::<u32>() as u32;
669                trace!("prefetch table locates at offset {} size {}", offset, size);
670                (offset, size)
671            } else {
672                (0, 0)
673            };
674
675        // Make the superblock's meta_blkaddr one block ahead of the inode table,
676        // to avoid using 0 as root nid.
677        // inode offset = meta_blkaddr * block_size + 32 * nid
678        // When using nid 0 as root nid,
679        // the root directory will not be shown by glibc's getdents/readdir.
680        // Because in some OS, ino == 0 represents corresponding file is deleted.
681        let root_node_offset = self.tree.borrow_mut_node().v6_offset;
682        let orig_meta_addr = root_node_offset - EROFS_BLOCK_SIZE_4096;
683        let meta_addr = if blob_table_size > 0 {
684            align_offset(
685                blob_table_offset + blob_table_size + prefetch_table_size as u64,
686                EROFS_BLOCK_SIZE_4096,
687            )
688        } else {
689            orig_meta_addr
690        };
691        let meta_offset = meta_addr - orig_meta_addr;
692        let root_nid = calculate_nid(root_node_offset + meta_offset, meta_addr);
693
694        // Prepare extended super block
695        let mut ext_sb = RafsV6SuperBlockExt::new();
696        ext_sb.set_compressor(ctx.compressor);
697        ext_sb.set_digester(ctx.digester);
698        ext_sb.set_cipher(ctx.cipher);
699        ext_sb.set_chunk_size(ctx.chunk_size);
700        ext_sb.set_blob_table_offset(blob_table_offset);
701        ext_sb.set_blob_table_size(blob_table_size as u32);
702
703        // collect all chunks in this bootstrap.
704        // HashChunkDict cannot be used here, because there will be duplicate chunks between layers,
705        // but there is no deduplication during the actual construction.
706        // Each layer uses the corresponding chunk in the blob of its own layer.
707        // If HashChunkDict is used here, it will cause duplication. The chunks are removed,
708        // resulting in incomplete chunk info.
709        let mut chunk_cache = BTreeMap::new();
710
711        // Dump bootstrap
712        timing_tracer!(
713            {
714                self.tree.walk_bfs(true, &mut |n| {
715                    n.borrow_mut_node().dump_bootstrap_v6(
716                        ctx,
717                        bootstrap_ctx.writer.as_mut(),
718                        orig_meta_addr,
719                        meta_addr,
720                        &mut chunk_cache,
721                        &blobs,
722                    )
723                })
724            },
725            "dump_bootstrap"
726        )?;
727        Self::v6_align_to_4k(bootstrap_ctx)?;
728
729        // `Node` offset might be updated during above inodes dumping. So `get_prefetch_table` after it.
730        if prefetch_table_size > 0 {
731            let prefetch_table = ctx.prefetch.get_v6_prefetch_table(meta_addr);
732            if let Some(mut pt) = prefetch_table {
733                assert!(pt.len() * size_of::<u32>() <= prefetch_table_size as usize);
734                // Device slots are very close to extended super block.
735                ext_sb.set_prefetch_table_offset(prefetch_table_offset);
736                ext_sb.set_prefetch_table_size(prefetch_table_size);
737                bootstrap_ctx
738                    .writer
739                    .seek_offset(prefetch_table_offset as u64)
740                    .context("failed seek prefetch table offset")?;
741                pt.store(bootstrap_ctx.writer.as_mut()).unwrap();
742            }
743        }
744
745        // TODO: get rid of the chunk info array.
746        // Dump chunk info array.
747        let chunk_table_offset = bootstrap_ctx
748            .writer
749            .seek_to_end()
750            .context("failed to seek to bootstrap's end for chunk table")?;
751        let mut chunk_table_size: u64 = 0;
752        for (_, chunk) in chunk_cache.iter() {
753            let chunk_size = chunk
754                .store(bootstrap_ctx.writer.as_mut())
755                .context("failed to dump chunk table")?;
756            chunk_table_size += chunk_size as u64;
757        }
758        ext_sb.set_chunk_table(chunk_table_offset, chunk_table_size);
759        debug!(
760            "chunk_table offset {} size {}",
761            chunk_table_offset, chunk_table_size
762        );
763        Self::v6_align_to_4k(bootstrap_ctx)?;
764
765        // Prepare device slots.
766        let mut pos = bootstrap_ctx
767            .writer
768            .seek_to_end()
769            .context("failed to seek to bootstrap's end for chunk table")?;
770        assert_eq!(pos % block_size, 0);
771        let mut devtable: Vec<RafsV6Device> = Vec::new();
772        let mut block_count = 0u32;
773        let mut inlined_chunk_digest = true;
774        for entry in blobs.iter() {
775            let mut devslot = RafsV6Device::new();
776            // blob id is String, which is processed by sha256.finalize().
777            if entry.blob_id().is_empty() {
778                bail!(" blob id is empty");
779            } else if entry.blob_id().len() > 64 {
780                bail!(format!(
781                    "blob id length is bigger than 64 bytes, blob id {:?}",
782                    entry.blob_id()
783                ));
784            } else if entry.uncompressed_size() / block_size > u32::MAX as u64 {
785                bail!(format!(
786                    "uncompressed blob size (0x:{:x}) is too big",
787                    entry.uncompressed_size()
788                ));
789            }
790            if !entry.has_feature(BlobFeatures::INLINED_CHUNK_DIGEST) {
791                inlined_chunk_digest = false;
792            }
793            let cnt = (entry.uncompressed_size() / block_size) as u32;
794            if block_count.checked_add(cnt).is_none() {
795                bail!("Too many data blocks in RAFS filesystem, block size 0x{:x}, block count 0x{:x}", block_size, block_count as u64 + cnt as u64);
796            }
797            let mapped_blkaddr = Self::v6_align_mapped_blkaddr(block_size, pos)?;
798            pos = (mapped_blkaddr + cnt) as u64 * block_size;
799            block_count += cnt;
800
801            let id = entry.blob_id();
802            let id = id.as_bytes();
803            let mut blob_id = [0u8; 64];
804            blob_id[..id.len()].copy_from_slice(id);
805            devslot.set_blob_id(&blob_id);
806            devslot.set_blocks(cnt);
807            devslot.set_mapped_blkaddr(mapped_blkaddr);
808            devtable.push(devslot);
809        }
810
811        // Dump super block
812        let mut sb = RafsV6SuperBlock::new();
813        if ctx.conversion_type == ConversionType::TarToTarfs {
814            sb.set_block_bits(EROFS_BLOCK_BITS_9);
815        }
816        sb.set_inos(bootstrap_ctx.get_next_ino() - 1);
817        sb.set_blocks(block_count);
818        sb.set_root_nid(root_nid as u16);
819        sb.set_meta_addr(meta_addr);
820        sb.set_extra_devices(blob_table_entries as u16);
821        bootstrap_ctx.writer.seek(SeekFrom::Start(0))?;
822        sb.store(bootstrap_ctx.writer.as_mut())
823            .context("failed to store SB")?;
824
825        // Dump extended super block.
826        if ctx.explicit_uidgid {
827            ext_sb.set_explicit_uidgid();
828        }
829        if ctx.has_xattr {
830            ext_sb.set_has_xattr();
831        }
832        if inlined_chunk_digest {
833            ext_sb.set_inlined_chunk_digest();
834        }
835        if ctx.conversion_type == ConversionType::TarToTarfs {
836            ext_sb.set_tarfs_mode();
837        }
838        bootstrap_ctx
839            .writer
840            .seek_offset((EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as u64)
841            .context("failed to seek for extended super block")?;
842        ext_sb
843            .store(bootstrap_ctx.writer.as_mut())
844            .context("failed to store extended super block")?;
845
846        // Dump device slots.
847        bootstrap_ctx
848            .writer
849            .seek_offset(EROFS_DEVTABLE_OFFSET as u64)
850            .context("failed to seek devtslot")?;
851        for slot in devtable.iter() {
852            slot.store(bootstrap_ctx.writer.as_mut())
853                .context("failed to store device slot")?;
854        }
855
856        // Dump blob table
857        bootstrap_ctx
858            .writer
859            .seek_offset(blob_table_offset as u64)
860            .context("failed seek for extended blob table offset")?;
861        blob_table
862            .store(bootstrap_ctx.writer.as_mut())
863            .context("failed to store extended blob table")?;
864
865        Ok(())
866    }
867
868    fn v6_align_to_4k(bootstrap_ctx: &mut BootstrapContext) -> Result<()> {
869        bootstrap_ctx
870            .writer
871            .flush()
872            .context("failed to flush bootstrap")?;
873        let pos = bootstrap_ctx
874            .writer
875            .seek_to_end()
876            .context("failed to seek to bootstrap's end for chunk table")?;
877        let padding = align_offset(pos, EROFS_BLOCK_SIZE_4096) - pos;
878        bootstrap_ctx
879            .writer
880            .write_all(&WRITE_PADDING_DATA[0..padding as usize])
881            .context("failed to write 0 to padding of bootstrap's end for chunk table")?;
882        bootstrap_ctx
883            .writer
884            .flush()
885            .context("failed to flush bootstrap")?;
886        Ok(())
887    }
888
889    fn v6_align_mapped_blkaddr(block_size: u64, addr: u64) -> Result<u32> {
890        match addr.checked_add(V6_BLOCK_SEG_ALIGNMENT - 1) {
891            None => bail!("address 0x{:x} is too big", addr),
892            Some(v) => {
893                let v = (v & !(V6_BLOCK_SEG_ALIGNMENT - 1)) / block_size;
894                if v > u32::MAX as u64 {
895                    bail!("address 0x{:x} is too big", addr);
896                } else {
897                    Ok(v as u32)
898                }
899            }
900        }
901    }
902}
903
904#[cfg(test)]
905mod tests {
906    use super::*;
907    use crate::{ArtifactStorage, BootstrapContext, Overlay};
908    use nydus_rafs::metadata::layout::v6::{EROFS_INODE_CHUNK_BASED, EROFS_INODE_SLOT_SIZE};
909    use nydus_rafs::metadata::{RafsVersion, RAFS_DEFAULT_CHUNK_SIZE};
910    use std::fs::File;
911    use vmm_sys_util::{tempdir::TempDir, tempfile::TempFile};
912
913    #[test]
914    fn test_set_v6_offset() {
915        let pa = TempDir::new().unwrap();
916        let pa_aa = TempFile::new_in(pa.as_path()).unwrap();
917        let mut node = Node::from_fs_object(
918            RafsVersion::V6,
919            pa.as_path().to_path_buf(),
920            pa_aa.as_path().to_path_buf(),
921            Overlay::UpperAddition,
922            RAFS_DEFAULT_CHUNK_SIZE as u32,
923            0,
924            false,
925            false,
926        )
927        .unwrap();
928
929        let bootstrap_path = TempFile::new().unwrap();
930        let storage = ArtifactStorage::SingleFile(bootstrap_path.as_path().to_path_buf());
931        let mut bootstrap_ctx = BootstrapContext::new(Some(storage), false).unwrap();
932        bootstrap_ctx.offset = 0;
933
934        // reg file.
935        // "1" is used only for testing purpose, in practice
936        // it's always aligned to 32 bytes.
937        node.v6_set_offset(&mut bootstrap_ctx, None, EROFS_BLOCK_SIZE_4096)
938            .unwrap();
939        assert_eq!(node.v6_offset, 0);
940        assert_eq!(node.v6_datalayout, EROFS_INODE_CHUNK_BASED);
941        assert!(node.v6_compact_inode);
942        assert_eq!(bootstrap_ctx.offset, 32);
943
944        // symlink and dir are handled in the same way.
945        let mut dir_node = Node::from_fs_object(
946            RafsVersion::V6,
947            pa.as_path().to_path_buf(),
948            pa.as_path().to_path_buf(),
949            Overlay::UpperAddition,
950            RAFS_DEFAULT_CHUNK_SIZE as u32,
951            0,
952            false,
953            false,
954        )
955        .unwrap();
956
957        dir_node
958            .v6_set_dir_offset(&mut bootstrap_ctx, 4064, EROFS_BLOCK_SIZE_4096)
959            .unwrap();
960        assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE);
961        assert_eq!(dir_node.v6_offset, 4096);
962        assert_eq!(bootstrap_ctx.offset, 8192);
963
964        dir_node
965            .v6_set_dir_offset(&mut bootstrap_ctx, 4096, EROFS_BLOCK_SIZE_4096)
966            .unwrap();
967        assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_PLAIN);
968        assert_eq!(dir_node.v6_offset, 32);
969        assert_eq!(dir_node.v6_dirents_offset, 8192);
970        assert_eq!(bootstrap_ctx.offset, 8192 + 4096);
971
972        dir_node
973            .v6_set_dir_offset(&mut bootstrap_ctx, 8160, EROFS_BLOCK_SIZE_4096)
974            .unwrap();
975        assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE);
976        assert_eq!(dir_node.v6_offset, 8192 + 4096);
977        assert_eq!(dir_node.v6_dirents_offset, 8192 + 4096 + 4096);
978        assert_eq!(bootstrap_ctx.offset, 8192 + 4096 + 8192);
979
980        dir_node
981            .v6_set_dir_offset(&mut bootstrap_ctx, 8161, EROFS_BLOCK_SIZE_4096)
982            .unwrap();
983        assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_PLAIN);
984        assert_eq!(dir_node.v6_offset, 64);
985        assert_eq!(dir_node.v6_dirents_offset, 8192 + 4096 + 8192);
986        assert_eq!(bootstrap_ctx.offset, 8192 + 4096 + 8192 + 8192);
987
988        dir_node
989            .v6_set_dir_offset(&mut bootstrap_ctx, 4096 + 3968, EROFS_BLOCK_SIZE_4096)
990            .unwrap();
991        assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE);
992        assert_eq!(dir_node.v6_offset, 96);
993        assert_eq!(dir_node.v6_dirents_offset, 8192 + 4096 + 8192 + 8192);
994        assert_eq!(bootstrap_ctx.offset, 8192 + 4096 + 8192 + 8192 + 4096);
995
996        dir_node
997            .v6_set_dir_offset(&mut bootstrap_ctx, 4096 + 2048, EROFS_BLOCK_SIZE_4096)
998            .unwrap();
999        assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE);
1000        assert_eq!(dir_node.v6_offset, 8192 + 4096 + 8192 + 8192 + 4096);
1001        assert_eq!(
1002            dir_node.v6_dirents_offset,
1003            8192 + 4096 + 8192 + 8192 + 4096 + 4096
1004        );
1005        assert_eq!(
1006            bootstrap_ctx.offset,
1007            8192 + 4096 + 8192 + 8192 + 4096 + 8192
1008        );
1009
1010        dir_node
1011            .v6_set_dir_offset(&mut bootstrap_ctx, 1985, EROFS_BLOCK_SIZE_4096)
1012            .unwrap();
1013        assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE);
1014        assert_eq!(dir_node.v6_offset, 8192 + 4096 + 8192 + 8192 + 4096 + 8192);
1015        assert_eq!(
1016            bootstrap_ctx.offset,
1017            8192 + 4096 + 8192 + 8192 + 4096 + 8192 + 32 + 1985 + 31
1018        );
1019
1020        bootstrap_ctx.align_offset(EROFS_INODE_SLOT_SIZE as u64);
1021        dir_node
1022            .v6_set_dir_offset(&mut bootstrap_ctx, 1984, EROFS_BLOCK_SIZE_4096)
1023            .unwrap();
1024        assert_eq!(dir_node.v6_datalayout, EROFS_INODE_FLAT_INLINE);
1025        assert_eq!(
1026            dir_node.v6_offset,
1027            8192 + 4096 + 8192 + 8192 + 4096 + 2048 + 32
1028        );
1029        assert_eq!(
1030            bootstrap_ctx.offset,
1031            8192 + 4096 + 8192 + 8192 + 4096 + 8192 + round_up(32 + 1985, 32)
1032        );
1033    }
1034
1035    #[test]
1036    fn test_set_v6_inode_compact() {
1037        let pa = TempDir::new().unwrap();
1038        let pa_reg = TempFile::new_in(pa.as_path()).unwrap();
1039        let pa_pyc = pa.as_path().join("foo.pyc");
1040        let _ = File::create(&pa_pyc).unwrap();
1041
1042        let reg_node = Node::from_fs_object(
1043            RafsVersion::V6,
1044            pa.as_path().to_path_buf(),
1045            pa_reg.as_path().to_path_buf(),
1046            Overlay::UpperAddition,
1047            RAFS_DEFAULT_CHUNK_SIZE as u32,
1048            0,
1049            false,
1050            false,
1051        )
1052        .unwrap();
1053
1054        assert!(reg_node.v6_compact_inode);
1055
1056        let pyc_node = Node::from_fs_object(
1057            RafsVersion::V6,
1058            pa.as_path().to_path_buf(),
1059            pa_pyc.as_path().to_path_buf(),
1060            Overlay::UpperAddition,
1061            RAFS_DEFAULT_CHUNK_SIZE as u32,
1062            0,
1063            false,
1064            false,
1065        )
1066        .unwrap();
1067
1068        assert!(!pyc_node.v6_compact_inode);
1069
1070        std::fs::remove_file(&pa_pyc).unwrap();
1071    }
1072}