nydus_builder/core/
context.rs

1// Copyright 2020 Ant Group. All rights reserved.
2//
3// SPDX-License-Identifier: Apache-2.0
4
5//! Struct to maintain context information for the image builder.
6
7use std::any::Any;
8use std::borrow::Cow;
9use std::collections::{HashMap, VecDeque};
10use std::convert::TryFrom;
11use std::fs::{remove_file, rename, File, OpenOptions};
12use std::io::{BufWriter, Cursor, Read, Seek, Write};
13use std::mem::size_of;
14use std::os::unix::fs::FileTypeExt;
15use std::path::{Display, Path, PathBuf};
16use std::result::Result::Ok;
17use std::str::FromStr;
18use std::sync::{Arc, Mutex};
19use std::{fmt, fs};
20
21use anyhow::{anyhow, Context, Error, Result};
22use nydus_utils::crc32;
23use nydus_utils::crypt::{self, Cipher, CipherContext};
24use sha2::{Digest, Sha256};
25use tar::{EntryType, Header};
26use vmm_sys_util::tempfile::TempFile;
27
28use nydus_api::ConfigV2;
29use nydus_rafs::metadata::chunk::ChunkWrapper;
30use nydus_rafs::metadata::layout::v5::RafsV5BlobTable;
31use nydus_rafs::metadata::layout::v6::{
32    RafsV6BlobTable, EROFS_BLOCK_SIZE_4096, EROFS_INODE_SLOT_SIZE,
33};
34use nydus_rafs::metadata::layout::RafsBlobTable;
35use nydus_rafs::metadata::{Inode, RAFS_DEFAULT_CHUNK_SIZE};
36use nydus_rafs::metadata::{RafsSuperFlags, RafsVersion};
37use nydus_rafs::RafsIoWrite;
38use nydus_storage::device::{BlobFeatures, BlobInfo};
39use nydus_storage::factory::BlobFactory;
40use nydus_storage::meta::toc::{TocEntryList, TocLocation};
41use nydus_storage::meta::{
42    toc, BatchContextGenerator, BlobChunkInfoV2Ondisk, BlobCompressionContextHeader,
43    BlobMetaChunkArray, BlobMetaChunkInfo, ZranContextGenerator,
44};
45use nydus_utils::digest::DigestData;
46use nydus_utils::{compress, digest, div_round_up, round_down, try_round_up_4k, BufReaderInfo};
47
48use super::node::ChunkSource;
49use crate::attributes::Attributes;
50use crate::core::tree::TreeNode;
51use crate::{ChunkDict, Feature, Features, HashChunkDict, Prefetch, PrefetchPolicy, WhiteoutSpec};
52
53// TODO: select BufWriter capacity by performance testing.
54pub const BUF_WRITER_CAPACITY: usize = 2 << 17;
55
56/// Filesystem conversion type supported by RAFS builder.
57#[derive(Clone, Copy, Debug, Eq, PartialEq)]
58pub enum ConversionType {
59    DirectoryToRafs,
60    DirectoryToStargz,
61    DirectoryToTargz,
62    EStargzToRafs,
63    EStargzToRef,
64    EStargzIndexToRef,
65    TargzToRafs,
66    TargzToStargz,
67    TargzToRef,
68    TarToStargz,
69    TarToRafs,
70    TarToRef,
71    TarToTarfs,
72}
73
74impl Default for ConversionType {
75    fn default() -> Self {
76        Self::DirectoryToRafs
77    }
78}
79
80impl FromStr for ConversionType {
81    type Err = Error;
82    fn from_str(s: &str) -> Result<Self> {
83        match s {
84            "dir-rafs" => Ok(Self::DirectoryToRafs),
85            "dir-stargz" => Ok(Self::DirectoryToStargz),
86            "dir-targz" => Ok(Self::DirectoryToTargz),
87            "estargz-rafs" => Ok(Self::EStargzToRafs),
88            "estargz-ref" => Ok(Self::EStargzToRef),
89            "estargztoc-ref" => Ok(Self::EStargzIndexToRef),
90            "targz-rafs" => Ok(Self::TargzToRafs),
91            "targz-stargz" => Ok(Self::TargzToStargz),
92            "targz-ref" => Ok(Self::TargzToRef),
93            "tar-rafs" => Ok(Self::TarToRafs),
94            "tar-stargz" => Ok(Self::TarToStargz),
95            "tar-tarfs" => Ok(Self::TarToTarfs),
96            // kept for backward compatibility
97            "directory" => Ok(Self::DirectoryToRafs),
98            "stargz_index" => Ok(Self::EStargzIndexToRef),
99            _ => Err(anyhow!("invalid conversion type")),
100        }
101    }
102}
103
104impl fmt::Display for ConversionType {
105    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106        match self {
107            ConversionType::DirectoryToRafs => write!(f, "dir-rafs"),
108            ConversionType::DirectoryToStargz => write!(f, "dir-stargz"),
109            ConversionType::DirectoryToTargz => write!(f, "dir-targz"),
110            ConversionType::EStargzToRafs => write!(f, "estargz-rafs"),
111            ConversionType::EStargzToRef => write!(f, "estargz-ref"),
112            ConversionType::EStargzIndexToRef => write!(f, "estargztoc-ref"),
113            ConversionType::TargzToRafs => write!(f, "targz-rafs"),
114            ConversionType::TargzToStargz => write!(f, "targz-ref"),
115            ConversionType::TargzToRef => write!(f, "targz-ref"),
116            ConversionType::TarToRafs => write!(f, "tar-rafs"),
117            ConversionType::TarToRef => write!(f, "tar-ref"),
118            ConversionType::TarToStargz => write!(f, "tar-stargz"),
119            ConversionType::TarToTarfs => write!(f, "tar-tarfs"),
120        }
121    }
122}
123
124impl ConversionType {
125    /// Check whether the generated image references the original OCI image data.
126    pub fn is_to_ref(&self) -> bool {
127        matches!(
128            self,
129            ConversionType::EStargzToRef
130                | ConversionType::EStargzIndexToRef
131                | ConversionType::TargzToRef
132                | ConversionType::TarToRef
133                | ConversionType::TarToTarfs
134        )
135    }
136}
137
138/// Filesystem based storage configuration for artifacts.
139#[derive(Debug, Clone)]
140pub enum ArtifactStorage {
141    // Won't rename user's specification
142    SingleFile(PathBuf),
143    // Will rename it from tmp file as user didn't specify a name.
144    FileDir((PathBuf, String)),
145}
146
147impl ArtifactStorage {
148    /// Show file path to store the generated artifacts.
149    pub fn display(&self) -> Display<'_> {
150        match self {
151            ArtifactStorage::SingleFile(p) => p.display(),
152            ArtifactStorage::FileDir(p) => p.0.display(),
153        }
154    }
155
156    pub fn add_suffix(&mut self, suffix: &str) {
157        match self {
158            ArtifactStorage::SingleFile(p) => {
159                p.set_extension(suffix);
160            }
161            ArtifactStorage::FileDir(p) => p.1 = String::from(suffix),
162        }
163    }
164}
165
166impl Default for ArtifactStorage {
167    fn default() -> Self {
168        Self::SingleFile(PathBuf::new())
169    }
170}
171
172/// ArtifactMemoryWriter provides a writer to allow writing bootstrap
173/// data to a byte slice in memory.
174struct ArtifactMemoryWriter(Cursor<Vec<u8>>);
175
176impl Default for ArtifactMemoryWriter {
177    fn default() -> Self {
178        Self(Cursor::new(Vec::new()))
179    }
180}
181
182impl RafsIoWrite for ArtifactMemoryWriter {
183    fn as_any(&self) -> &dyn Any {
184        &self.0
185    }
186
187    fn as_bytes(&mut self) -> std::io::Result<Cow<'_, [u8]>> {
188        self.0.set_position(0);
189        Ok(Cow::Borrowed(self.0.get_ref().as_slice()))
190    }
191}
192
193impl Seek for ArtifactMemoryWriter {
194    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
195        self.0.seek(pos)
196    }
197}
198
199impl Write for ArtifactMemoryWriter {
200    fn write(&mut self, bytes: &[u8]) -> std::io::Result<usize> {
201        self.0.write(bytes)
202    }
203
204    fn flush(&mut self) -> std::io::Result<()> {
205        self.0.flush()
206    }
207}
208
209struct ArtifactFileWriter(pub ArtifactWriter);
210
211impl ArtifactFileWriter {
212    pub fn finalize(&mut self, name: Option<String>) -> Result<()> {
213        self.0.finalize(name)
214    }
215}
216
217impl RafsIoWrite for ArtifactFileWriter {
218    fn as_any(&self) -> &dyn Any {
219        &self.0
220    }
221
222    fn finalize(&mut self, name: Option<String>) -> Result<()> {
223        self.0.finalize(name)
224    }
225
226    fn as_bytes(&mut self) -> std::io::Result<Cow<'_, [u8]>> {
227        self.0.file.flush()?;
228        self.0.reader.seek_offset(0)?;
229
230        let mut buf = Vec::new();
231        self.0.reader.read_to_end(&mut buf)?;
232
233        Ok(Cow::Owned(buf))
234    }
235}
236
237impl ArtifactFileWriter {
238    pub fn set_len(&mut self, s: u64) -> std::io::Result<()> {
239        self.0.file.get_mut().set_len(s)
240    }
241}
242
243impl Seek for ArtifactFileWriter {
244    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
245        self.0.file.seek(pos)
246    }
247}
248
249impl Write for ArtifactFileWriter {
250    fn write(&mut self, bytes: &[u8]) -> std::io::Result<usize> {
251        self.0.write(bytes)
252    }
253
254    fn flush(&mut self) -> std::io::Result<()> {
255        self.0.flush()
256    }
257}
258
259pub trait Artifact: Write {
260    fn pos(&self) -> Result<u64>;
261    fn finalize(&mut self, name: Option<String>) -> Result<()>;
262}
263
264#[derive(Default)]
265pub struct NoopArtifactWriter {
266    pos: usize,
267}
268
269impl Write for NoopArtifactWriter {
270    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
271        self.pos += buf.len();
272        Ok(buf.len())
273    }
274
275    fn flush(&mut self) -> std::io::Result<()> {
276        Ok(())
277    }
278}
279
280impl Artifact for NoopArtifactWriter {
281    fn pos(&self) -> Result<u64> {
282        Ok(self.pos as u64)
283    }
284
285    fn finalize(&mut self, _name: Option<String>) -> Result<()> {
286        Ok(())
287    }
288}
289
290/// ArtifactWriter provides a writer to allow writing bootstrap
291/// or blob data to a single file or in a directory.
292pub struct ArtifactWriter {
293    pos: usize,
294    file: BufWriter<File>,
295    reader: File,
296    storage: ArtifactStorage,
297    // Keep this because tmp file will be removed automatically when it is dropped.
298    // But we will rename/link the tmp file before it is removed.
299    tmp_file: Option<TempFile>,
300}
301
302impl Write for ArtifactWriter {
303    fn write(&mut self, bytes: &[u8]) -> std::io::Result<usize> {
304        let n = self.file.write(bytes)?;
305        self.pos += n;
306        Ok(n)
307    }
308
309    fn flush(&mut self) -> std::io::Result<()> {
310        self.file.flush()
311    }
312}
313
314impl ArtifactWriter {
315    /// Create a new instance of [ArtifactWriter] from a [ArtifactStorage] configuration object.
316    pub fn new(storage: ArtifactStorage) -> Result<Self> {
317        match storage {
318            ArtifactStorage::SingleFile(ref p) => {
319                let mut opener = &mut OpenOptions::new();
320                opener = opener.write(true).create(true);
321                if let Ok(md) = fs::metadata(p) {
322                    let ty = md.file_type();
323                    // Make it as the writer side of FIFO file, no truncate flag because it has
324                    // been created by the reader side.
325                    if !ty.is_fifo() {
326                        opener = opener.truncate(true);
327                    }
328                }
329                let b = BufWriter::with_capacity(
330                    BUF_WRITER_CAPACITY,
331                    opener
332                        .open(p)
333                        .with_context(|| format!("failed to open file {}", p.display()))?,
334                );
335                let reader = OpenOptions::new()
336                    .read(true)
337                    .open(p)
338                    .with_context(|| format!("failed to open file {}", p.display()))?;
339                Ok(Self {
340                    pos: 0,
341                    file: b,
342                    reader,
343                    storage,
344                    tmp_file: None,
345                })
346            }
347            ArtifactStorage::FileDir(ref p) => {
348                // Better we can use open(2) O_TMPFILE, but for compatibility sake, we delay this job.
349                // TODO: Blob dir existence?
350                let tmp = TempFile::new_in(&p.0)
351                    .with_context(|| format!("failed to create temp file in {}", p.0.display()))?;
352                let tmp2 = tmp.as_file().try_clone()?;
353                let reader = OpenOptions::new()
354                    .read(true)
355                    .open(tmp.as_path())
356                    .with_context(|| format!("failed to open file {}", tmp.as_path().display()))?;
357                Ok(Self {
358                    pos: 0,
359                    file: BufWriter::with_capacity(BUF_WRITER_CAPACITY, tmp2),
360                    reader,
361                    storage,
362                    tmp_file: Some(tmp),
363                })
364            }
365        }
366    }
367}
368
369impl Artifact for ArtifactWriter {
370    /// Get the current write position.
371    fn pos(&self) -> Result<u64> {
372        Ok(self.pos as u64)
373    }
374
375    /// Finalize the metadata/data blob.
376    ///
377    /// When `name` is None, it means that the blob is empty and should be removed.
378    fn finalize(&mut self, name: Option<String>) -> Result<()> {
379        self.file.flush()?;
380
381        if let Some(n) = name {
382            if let ArtifactStorage::FileDir(s) = &self.storage {
383                let mut path = Path::new(&s.0).join(n);
384                if !s.1.is_empty() {
385                    path.set_extension(&s.1);
386                }
387                if !path.exists() {
388                    if let Some(tmp_file) = &self.tmp_file {
389                        rename(tmp_file.as_path(), &path).with_context(|| {
390                            format!(
391                                "failed to rename blob {:?} to {:?}",
392                                tmp_file.as_path(),
393                                path
394                            )
395                        })?;
396                    }
397                }
398            }
399        } else if let ArtifactStorage::SingleFile(s) = &self.storage {
400            if let Ok(md) = s.metadata() {
401                if md.is_file() {
402                    remove_file(s).with_context(|| format!("failed to remove blob {:?}", s))?;
403                }
404            }
405        }
406
407        Ok(())
408    }
409}
410
411pub struct BlobCacheGenerator {
412    blob_data: Mutex<ArtifactFileWriter>,
413    blob_meta: Mutex<ArtifactFileWriter>,
414}
415
416impl BlobCacheGenerator {
417    pub fn new(storage: ArtifactStorage) -> Result<Self> {
418        Ok(BlobCacheGenerator {
419            blob_data: Mutex::new(ArtifactFileWriter(ArtifactWriter::new(storage.clone())?)),
420            blob_meta: Mutex::new(ArtifactFileWriter(ArtifactWriter::new(storage)?)),
421        })
422    }
423
424    pub fn write_blob_meta(
425        &self,
426        data: &[u8],
427        header: &BlobCompressionContextHeader,
428    ) -> Result<()> {
429        let mut guard = self.blob_meta.lock().unwrap();
430        let aligned_uncompressed_size = try_round_up_4k(data.len() as u64).ok_or(anyhow!(
431            format!("invalid input {} for try_round_up_4k", data.len())
432        ))?;
433        guard.set_len(
434            aligned_uncompressed_size + size_of::<BlobCompressionContextHeader>() as u64,
435        )?;
436        guard
437            .write_all(data)
438            .context("failed to write blob meta data")?;
439        guard.seek(std::io::SeekFrom::Start(aligned_uncompressed_size))?;
440        guard
441            .write_all(header.as_bytes())
442            .context("failed to write blob meta header")?;
443        Ok(())
444    }
445
446    pub fn write_blob_data(
447        &self,
448        chunk_data: &[u8],
449        chunk_info: &ChunkWrapper,
450        aligned_d_size: u32,
451    ) -> Result<()> {
452        let mut guard = self.blob_data.lock().unwrap();
453        let curr_pos = guard.seek(std::io::SeekFrom::End(0))?;
454        if curr_pos < chunk_info.uncompressed_offset() + aligned_d_size as u64 {
455            guard.set_len(chunk_info.uncompressed_offset() + aligned_d_size as u64)?;
456        }
457
458        guard.seek(std::io::SeekFrom::Start(chunk_info.uncompressed_offset()))?;
459        guard
460            .write_all(&chunk_data)
461            .context("failed to write blob cache")?;
462        Ok(())
463    }
464
465    pub fn finalize(&self, name: &str) -> Result<()> {
466        let blob_data_name = format!("{}.blob.data", name);
467        let mut guard = self.blob_data.lock().unwrap();
468        guard.finalize(Some(blob_data_name))?;
469        drop(guard);
470
471        let blob_meta_name = format!("{}.blob.meta", name);
472        let mut guard = self.blob_meta.lock().unwrap();
473        guard.finalize(Some(blob_meta_name))
474    }
475}
476
477#[derive(Clone)]
478/// BlobContext is used to hold the blob information of a layer during build.
479pub struct BlobContext {
480    /// Blob id (user specified or sha256(blob)).
481    pub blob_id: String,
482    pub blob_hash: Sha256,
483    pub blob_compressor: compress::Algorithm,
484    pub blob_digester: digest::Algorithm,
485    pub blob_cipher: crypt::Algorithm,
486    pub blob_prefetch_size: u64,
487    /// Whether to generate blob metadata information.
488    pub blob_meta_info_enabled: bool,
489    /// Data chunks stored in the data blob, for v6.
490    pub blob_meta_info: BlobMetaChunkArray,
491    /// Blob metadata header stored in the data blob, for v6
492    pub blob_meta_header: BlobCompressionContextHeader,
493    /// Blob chunk digest array.
494    pub blob_chunk_digest: Vec<DigestData>,
495
496    /// Final compressed blob file size.
497    pub compressed_blob_size: u64,
498    /// Final expected blob cache file size.
499    pub uncompressed_blob_size: u64,
500
501    /// Current blob offset cursor for writing to disk file.
502    pub current_compressed_offset: u64,
503    pub current_uncompressed_offset: u64,
504
505    /// The number of counts in a blob by the index of blob table.
506    pub chunk_count: u32,
507    /// Chunk slice size.
508    pub chunk_size: u32,
509    /// Whether the blob is from chunk dict.
510    pub chunk_source: ChunkSource,
511
512    // SHA256 digest of blob ToC content, including the toc tar header.
513    // It's all zero for blobs with inlined-meta.
514    pub blob_toc_digest: [u8; 32],
515    // SHA256 digest of RAFS blob for ZRAN, containing `blob.meta`, `blob.digest` `blob.toc` and
516    // optionally 'image.boot`. It's all zero for ZRAN blobs with inlined-meta, so need special
517    // handling.
518    pub blob_meta_digest: [u8; 32],
519    // Size of RAFS blob for ZRAN. It's zero ZRAN blobs with inlined-meta.
520    pub blob_meta_size: u64,
521    // Size of blob ToC content, it's zero for blobs with inlined-meta.
522    pub blob_toc_size: u32,
523
524    pub entry_list: toc::TocEntryList,
525    /// Cipher to encrypt the RAFS blobs.
526    pub cipher_object: Arc<Cipher>,
527    pub cipher_ctx: Option<CipherContext>,
528
529    /// Whether the blob is from external storage backend.
530    pub external: bool,
531}
532
533impl BlobContext {
534    /// Create a new instance of [BlobContext].
535    #[allow(clippy::too_many_arguments)]
536    pub fn new(
537        blob_id: String,
538        blob_offset: u64,
539        features: BlobFeatures,
540        compressor: compress::Algorithm,
541        digester: digest::Algorithm,
542        cipher: crypt::Algorithm,
543        cipher_object: Arc<Cipher>,
544        cipher_ctx: Option<CipherContext>,
545        external: bool,
546    ) -> Self {
547        let blob_meta_info = if features.contains(BlobFeatures::CHUNK_INFO_V2) {
548            BlobMetaChunkArray::new_v2()
549        } else {
550            BlobMetaChunkArray::new_v1()
551        };
552        let mut blob_ctx = Self {
553            blob_id,
554            blob_hash: Sha256::new(),
555            blob_compressor: compressor,
556            blob_digester: digester,
557            blob_cipher: cipher,
558            blob_prefetch_size: 0,
559            blob_meta_info_enabled: false,
560            blob_meta_info,
561            blob_meta_header: BlobCompressionContextHeader::default(),
562            blob_chunk_digest: Vec::new(),
563
564            compressed_blob_size: 0,
565            uncompressed_blob_size: 0,
566
567            current_compressed_offset: blob_offset,
568            current_uncompressed_offset: 0,
569
570            chunk_count: 0,
571            chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32,
572            chunk_source: ChunkSource::Build,
573
574            blob_toc_digest: [0u8; 32],
575            blob_meta_digest: [0u8; 32],
576            blob_meta_size: 0,
577            blob_toc_size: 0,
578
579            entry_list: toc::TocEntryList::new(),
580            cipher_object,
581            cipher_ctx,
582
583            external,
584        };
585
586        blob_ctx
587            .blob_meta_header
588            .set_aligned(features.contains(BlobFeatures::ALIGNED));
589        blob_ctx
590            .blob_meta_header
591            .set_inlined_fs_meta(features.contains(BlobFeatures::INLINED_FS_META));
592        blob_ctx
593            .blob_meta_header
594            .set_chunk_info_v2(features.contains(BlobFeatures::CHUNK_INFO_V2));
595        blob_ctx
596            .blob_meta_header
597            .set_ci_batch(features.contains(BlobFeatures::BATCH));
598        blob_ctx
599            .blob_meta_header
600            .set_ci_zran(features.contains(BlobFeatures::ZRAN));
601        blob_ctx
602            .blob_meta_header
603            .set_separate_blob(features.contains(BlobFeatures::SEPARATE));
604        blob_ctx
605            .blob_meta_header
606            .set_inlined_chunk_digest(features.contains(BlobFeatures::INLINED_CHUNK_DIGEST));
607        blob_ctx
608            .blob_meta_header
609            .set_has_tar_header(features.contains(BlobFeatures::HAS_TAR_HEADER));
610        blob_ctx
611            .blob_meta_header
612            .set_has_toc(features.contains(BlobFeatures::HAS_TOC));
613        blob_ctx
614            .blob_meta_header
615            .set_cap_tar_toc(features.contains(BlobFeatures::CAP_TAR_TOC));
616        blob_ctx
617            .blob_meta_header
618            .set_tarfs(features.contains(BlobFeatures::TARFS));
619        blob_ctx
620            .blob_meta_header
621            .set_encrypted(features.contains(BlobFeatures::ENCRYPTED));
622        blob_ctx
623            .blob_meta_header
624            .set_is_chunkdict_generated(features.contains(BlobFeatures::IS_CHUNKDICT_GENERATED));
625        blob_ctx
626            .blob_meta_header
627            .set_external(features.contains(BlobFeatures::EXTERNAL));
628
629        blob_ctx
630    }
631
632    /// Create a new instance of [BlobContext] from `BlobInfo` object.
633    pub fn from(ctx: &BuildContext, blob: &BlobInfo, chunk_source: ChunkSource) -> Result<Self> {
634        let mut compressed_blob_size = blob.compressed_size();
635        let mut blob_meta_size = blob.blob_meta_size();
636        let mut toc_size = blob.blob_toc_size();
637        let mut blob_meta_digest = blob.blob_meta_digest().to_owned();
638        let mut toc_digest = blob.blob_toc_digest().to_owned();
639        let mut blob_id = blob.raw_blob_id().to_string();
640        let mut features = blob.features();
641
642        // Fixes up blob info objects from inlined-meta blobs.
643        if chunk_source == ChunkSource::Dict || chunk_source == ChunkSource::Parent {
644            if features.contains(BlobFeatures::INLINED_FS_META) {
645                features &= !BlobFeatures::INLINED_FS_META;
646
647                if !features.contains(BlobFeatures::SEPARATE) {
648                    blob_id = blob.blob_id();
649                }
650
651                if ctx.configuration.internal.blob_accessible() {
652                    let backend_config = ctx.configuration.get_backend_config().map_err(|e| {
653                        anyhow!("failed to get backend storage configuration, {}", e)
654                    })?;
655                    let blob_mgr = BlobFactory::new_backend(backend_config, "fix-inlined-meta")?;
656
657                    if features.contains(BlobFeatures::SEPARATE) {
658                        if let Ok(digest) = blob.get_blob_meta_id() {
659                            let reader = blob_mgr.get_reader(&digest).map_err(|e| {
660                                anyhow!("failed to get reader for blob {}, {}", digest, e)
661                            })?;
662                            let size = reader
663                                .blob_size()
664                                .map_err(|e| anyhow!("failed to get blob size, {:?}", e))?;
665                            if let Ok(v) = hex::decode(digest) {
666                                if v.len() == 32 {
667                                    blob_meta_digest.copy_from_slice(&v[..32]);
668                                    blob_meta_size = size;
669                                }
670                            }
671                            if blob.has_feature(BlobFeatures::HAS_TOC) {
672                                if let Ok(toc) = TocEntryList::read_from_blob::<File>(
673                                    reader.as_ref(),
674                                    None,
675                                    &TocLocation::default(),
676                                ) {
677                                    toc_digest = toc.toc_digest().data;
678                                    toc_size = toc.toc_size();
679                                }
680                            }
681                        }
682                    } else {
683                        let reader = blob_mgr.get_reader(&blob_id).map_err(|e| {
684                            anyhow!("failed to get reader for blob {}, {}", blob_id, e)
685                        })?;
686                        compressed_blob_size = reader
687                            .blob_size()
688                            .map_err(|e| anyhow!("failed to get blob size, {:?}", e))?;
689                        if blob.has_feature(BlobFeatures::HAS_TOC) {
690                            if let Ok(toc) = TocEntryList::read_from_blob::<File>(
691                                reader.as_ref(),
692                                None,
693                                &TocLocation::default(),
694                            ) {
695                                toc_digest = toc.toc_digest().data;
696                                toc_size = toc.toc_size();
697                            }
698                        }
699                    }
700                } else if features.contains(BlobFeatures::SEPARATE) {
701                    if let Ok(digest) = blob.get_blob_meta_id() {
702                        if let Ok(v) = hex::decode(digest) {
703                            if v.len() == 32 {
704                                blob_meta_digest.copy_from_slice(&v[..32]);
705                            }
706                        }
707                    }
708                }
709            } else if !blob.has_feature(BlobFeatures::CAP_TAR_TOC)
710                && !ctx.configuration.internal.blob_accessible()
711            {
712                blob_id = blob.blob_id();
713            }
714        }
715
716        let (cipher, cipher_object, cipher_ctx) = blob.get_cipher_info();
717
718        let mut blob_ctx = Self::new(
719            blob_id,
720            0,
721            features,
722            blob.compressor(),
723            blob.digester(),
724            cipher,
725            cipher_object,
726            cipher_ctx,
727            false,
728        );
729        blob_ctx.blob_prefetch_size = blob.prefetch_size();
730        blob_ctx.chunk_count = blob.chunk_count();
731        blob_ctx.uncompressed_blob_size = blob.uncompressed_size();
732        blob_ctx.compressed_blob_size = compressed_blob_size;
733        blob_ctx.chunk_size = blob.chunk_size();
734        blob_ctx.chunk_source = chunk_source;
735        blob_ctx.blob_meta_digest = blob_meta_digest;
736        blob_ctx.blob_meta_size = blob_meta_size;
737        blob_ctx.blob_toc_digest = toc_digest;
738        blob_ctx.blob_toc_size = toc_size;
739
740        if blob.meta_ci_is_valid() {
741            blob_ctx
742                .blob_meta_header
743                .set_ci_compressor(blob.meta_ci_compressor());
744            blob_ctx.blob_meta_header.set_ci_entries(blob.chunk_count());
745            blob_ctx
746                .blob_meta_header
747                .set_ci_compressed_offset(blob.meta_ci_offset());
748            blob_ctx
749                .blob_meta_header
750                .set_ci_compressed_size(blob.meta_ci_compressed_size());
751            blob_ctx
752                .blob_meta_header
753                .set_ci_uncompressed_size(blob.meta_ci_uncompressed_size());
754            blob_ctx.blob_meta_info_enabled = true;
755        }
756
757        Ok(blob_ctx)
758    }
759
760    /// Set chunk size for the blob.
761    pub fn set_chunk_size(&mut self, chunk_size: u32) {
762        self.chunk_size = chunk_size;
763    }
764
765    // TODO: check the logic to reset prefetch size
766    pub fn set_blob_prefetch_size(&mut self, ctx: &BuildContext) {
767        if (self.uncompressed_blob_size > 0
768            || (ctx.conversion_type == ConversionType::EStargzIndexToRef
769                && !self.blob_id.is_empty()))
770            && ctx.prefetch.policy != PrefetchPolicy::Blob
771        {
772            self.blob_prefetch_size = 0;
773        }
774    }
775
776    pub fn set_meta_info_enabled(&mut self, enable: bool) {
777        self.blob_meta_info_enabled = enable;
778    }
779
780    pub fn set_cipher_info(
781        &mut self,
782        cipher_object: Arc<Cipher>,
783        cipher_ctx: Option<CipherContext>,
784    ) {
785        self.cipher_object = cipher_object;
786        self.cipher_ctx = cipher_ctx;
787    }
788
789    pub fn add_chunk_meta_info(
790        &mut self,
791        chunk: &ChunkWrapper,
792        chunk_info: Option<BlobChunkInfoV2Ondisk>,
793    ) -> Result<()> {
794        if self.blob_meta_info_enabled {
795            assert_eq!(chunk.index() as usize, self.blob_meta_info.len());
796            match &self.blob_meta_info {
797                BlobMetaChunkArray::V1(_) => {
798                    self.blob_meta_info.add_v1(
799                        chunk.compressed_offset(),
800                        chunk.compressed_size(),
801                        chunk.uncompressed_offset(),
802                        chunk.uncompressed_size(),
803                    );
804                    self.blob_chunk_digest.push(chunk.id().data);
805                }
806                BlobMetaChunkArray::V2(_) => {
807                    if let Some(mut info) = chunk_info {
808                        info.set_uncompressed_offset(chunk.uncompressed_offset());
809                        self.blob_meta_info.add_v2_info(info);
810                    } else {
811                        let mut data: u64 = 0;
812                        if chunk.has_crc32() {
813                            data = chunk.crc32() as u64;
814                        }
815                        self.blob_meta_info.add_v2(
816                            chunk.compressed_offset(),
817                            chunk.compressed_size(),
818                            chunk.uncompressed_offset(),
819                            chunk.uncompressed_size(),
820                            chunk.is_compressed(),
821                            chunk.is_encrypted(),
822                            chunk.has_crc32(),
823                            chunk.is_batch(),
824                            data,
825                        );
826                    }
827                    self.blob_chunk_digest.push(chunk.id().data);
828                }
829            }
830        }
831
832        Ok(())
833    }
834
835    /// Allocate a count index sequentially in a blob.
836    pub fn alloc_chunk_index(&mut self) -> Result<u32> {
837        let index = self.chunk_count;
838
839        // Rafs v6 only supports 24 bit chunk id.
840        if index >= 0xff_ffff {
841            Err(Error::msg(
842                "the number of chunks in blob exceeds the u32 limit",
843            ))
844        } else {
845            self.chunk_count += 1;
846            Ok(index)
847        }
848    }
849
850    /// Get blob id if the blob has some chunks.
851    pub fn blob_id(&self) -> Option<String> {
852        if self.uncompressed_blob_size > 0 {
853            Some(self.blob_id.to_string())
854        } else {
855            None
856        }
857    }
858
859    /// Helper to write data to blob and update blob hash.
860    pub fn write_data(&mut self, blob_writer: &mut dyn Artifact, data: &[u8]) -> Result<()> {
861        blob_writer.write_all(data)?;
862        self.blob_hash.update(data);
863        Ok(())
864    }
865
866    /// Helper to write a tar header to blob and update blob hash.
867    pub fn write_tar_header(
868        &mut self,
869        blob_writer: &mut dyn Artifact,
870        name: &str,
871        size: u64,
872    ) -> Result<Header> {
873        // The `inline-bootstrap` option merges the blob and bootstrap into one
874        // file. We need some header to index the location of the blob and bootstrap,
875        // write_tar_header uses tar header that arranges the data as follows:
876        // data | tar_header | data | tar_header
877        // This is a tar-like structure, except that we put the tar header after the
878        // data. The advantage is that we do not need to determine the size of the data
879        // first, so that we can write the blob data by stream without seek to improve
880        // the performance of the blob dump by using fifo.
881
882        let mut header = Header::new_gnu();
883        header.set_path(Path::new(name))?;
884        header.set_entry_type(EntryType::Regular);
885        header.set_size(size);
886        // The checksum must be set to ensure that the tar reader implementation
887        // in golang can correctly parse the header.
888        header.set_cksum();
889
890        blob_writer.write_all(header.as_bytes())?;
891        self.blob_hash.update(header.as_bytes());
892        Ok(header)
893    }
894
895    /// Get offset of compressed blob, since current_compressed_offset
896    /// is always >= compressed_blob_size, we can safely subtract here.
897    pub fn compressed_offset(&self) -> u64 {
898        assert!(self.current_compressed_offset >= self.compressed_blob_size);
899        self.current_compressed_offset - self.compressed_blob_size
900    }
901}
902
903/// BlobManager stores all blob related information during build.
904pub struct BlobManager {
905    /// Some layers may not have a blob (only have metadata), so Option
906    /// is used here, the vector index will be as the layer index.
907    ///
908    /// We can get blob index for a layer by using:
909    /// `self.blobs.iter().flatten().collect()[layer_index];`
910    blobs: Vec<BlobContext>,
911    current_blob_index: Option<u32>,
912    /// Chunk dictionary to hold chunks from an extra chunk dict file.
913    /// Used for chunk data de-duplication within the whole image.
914    pub(crate) global_chunk_dict: Arc<dyn ChunkDict>,
915    /// Chunk dictionary to hold chunks from all layers.
916    /// Used for chunk data de-duplication between layers (with `--parent-bootstrap`)
917    /// or within layer (with `--inline-bootstrap`).
918    pub(crate) layered_chunk_dict: HashChunkDict,
919    // Whether the managed blobs is from external storage backend.
920    pub external: bool,
921}
922
923impl BlobManager {
924    /// Create a new instance of [BlobManager].
925    pub fn new(digester: digest::Algorithm, external: bool) -> Self {
926        Self {
927            blobs: Vec::new(),
928            current_blob_index: None,
929            global_chunk_dict: Arc::new(()),
930            layered_chunk_dict: HashChunkDict::new(digester),
931            external,
932        }
933    }
934
935    /// Set current blob index
936    pub fn set_current_blob_index(&mut self, index: usize) {
937        self.current_blob_index = Some(index as u32)
938    }
939
940    pub fn new_blob_ctx(&self, ctx: &BuildContext) -> Result<BlobContext> {
941        let (cipher_object, cipher_ctx) = match ctx.cipher {
942            crypt::Algorithm::None => (Default::default(), None),
943            crypt::Algorithm::Aes128Xts => {
944                let key = crypt::Cipher::generate_random_key(ctx.cipher)?;
945                let iv = crypt::Cipher::generate_random_iv()?;
946                let cipher_ctx = CipherContext::new(key, iv, false, ctx.cipher)?;
947                (
948                    ctx.cipher.new_cipher().ok().unwrap_or_default(),
949                    Some(cipher_ctx),
950                )
951            }
952            _ => {
953                return Err(anyhow!(format!(
954                    "cipher algorithm {:?} does not support",
955                    ctx.cipher
956                )))
957            }
958        };
959        let mut blob_features = ctx.blob_features;
960        let mut compressor = ctx.compressor;
961        if self.external {
962            blob_features.insert(BlobFeatures::EXTERNAL);
963            compressor = compress::Algorithm::None;
964        }
965        let mut blob_ctx = BlobContext::new(
966            ctx.blob_id.clone(),
967            ctx.blob_offset,
968            blob_features,
969            compressor,
970            ctx.digester,
971            ctx.cipher,
972            Arc::new(cipher_object),
973            cipher_ctx,
974            self.external,
975        );
976        blob_ctx.set_chunk_size(ctx.chunk_size);
977        blob_ctx.set_meta_info_enabled(
978            ctx.fs_version == RafsVersion::V6 && ctx.conversion_type != ConversionType::TarToTarfs,
979        );
980
981        Ok(blob_ctx)
982    }
983
984    /// Get the current blob object or create one if no current blob available.
985    pub fn get_or_create_current_blob(
986        &mut self,
987        ctx: &BuildContext,
988    ) -> Result<(u32, &mut BlobContext)> {
989        if self.current_blob_index.is_none() {
990            let blob_ctx = self.new_blob_ctx(ctx)?;
991            self.current_blob_index = Some(self.alloc_index()?);
992            self.add_blob(blob_ctx);
993        }
994        // Safe to unwrap because the blob context has been added.
995        Ok(self.get_current_blob().unwrap())
996    }
997
998    pub fn get_or_create_blob_by_idx(
999        &mut self,
1000        ctx: &BuildContext,
1001        blob_idx: u32,
1002    ) -> Result<(u32, &mut BlobContext)> {
1003        let blob_idx = blob_idx as usize;
1004        if blob_idx >= self.blobs.len() {
1005            for _ in self.blobs.len()..=blob_idx {
1006                let blob_ctx = self.new_blob_ctx(ctx)?;
1007                self.add_blob(blob_ctx);
1008            }
1009        }
1010        Ok((blob_idx as u32, &mut self.blobs[blob_idx as usize]))
1011    }
1012
1013    /// Get the current blob object.
1014    pub fn get_current_blob(&mut self) -> Option<(u32, &mut BlobContext)> {
1015        if let Some(idx) = self.current_blob_index {
1016            Some((idx, &mut self.blobs[idx as usize]))
1017        } else {
1018            None
1019        }
1020    }
1021
1022    /// Get or cerate blob for chunkdict, this is used for chunk deduplication.
1023    pub fn get_or_cerate_blob_for_chunkdict(
1024        &mut self,
1025        ctx: &BuildContext,
1026        id: &str,
1027    ) -> Result<(u32, &mut BlobContext)> {
1028        let blob_mgr = Self::new(ctx.digester, false);
1029        if self.get_blob_idx_by_id(id).is_none() {
1030            let blob_ctx = blob_mgr.new_blob_ctx(ctx)?;
1031            self.current_blob_index = Some(self.alloc_index()?);
1032            self.add_blob(blob_ctx);
1033        } else {
1034            self.current_blob_index = self.get_blob_idx_by_id(id);
1035        }
1036        let (_, blob_ctx) = self.get_current_blob().unwrap();
1037        if blob_ctx.blob_id.is_empty() {
1038            blob_ctx.blob_id = id.to_string();
1039        }
1040        // Safe to unwrap because the blob context has been added.
1041        Ok(self.get_current_blob().unwrap())
1042    }
1043
1044    /// Determine if the given blob has been created.
1045    pub fn has_blob(&self, blob_id: &str) -> bool {
1046        self.get_blob_idx_by_id(blob_id).is_some()
1047    }
1048
1049    /// Set the global chunk dictionary for chunk deduplication.
1050    pub fn set_chunk_dict(&mut self, dict: Arc<dyn ChunkDict>) {
1051        self.global_chunk_dict = dict
1052    }
1053
1054    /// Get the global chunk dictionary for chunk deduplication.
1055    pub fn get_chunk_dict(&self) -> Arc<dyn ChunkDict> {
1056        self.global_chunk_dict.clone()
1057    }
1058
1059    /// Allocate a blob index sequentially.
1060    ///
1061    /// This should be paired with Self::add() and keep in consistence.
1062    pub fn alloc_index(&self) -> Result<u32> {
1063        // Rafs v6 only supports 256 blobs.
1064        u8::try_from(self.blobs.len())
1065            .map(|v| v as u32)
1066            .with_context(|| Error::msg("too many blobs"))
1067    }
1068
1069    /// Get number of blobs managed by the manager.
1070    pub fn len(&self) -> usize {
1071        self.blobs.len()
1072    }
1073
1074    /// Check whether there's managed blobs.
1075    pub fn is_empty(&self) -> bool {
1076        self.blobs.is_empty()
1077    }
1078
1079    /// Add a blob context to manager
1080    ///
1081    /// This should be paired with Self::alloc_index() and keep in consistence.
1082    pub fn add_blob(&mut self, blob_ctx: BlobContext) {
1083        self.blobs.push(blob_ctx);
1084    }
1085
1086    /// Get all blob contexts (include the blob context that does not have a blob).
1087    pub fn get_blobs(&self) -> Vec<&BlobContext> {
1088        self.blobs.iter().collect()
1089    }
1090
1091    pub fn get_blob(&self, idx: usize) -> Option<&BlobContext> {
1092        self.blobs.get(idx)
1093    }
1094
1095    pub fn take_blob(&mut self, idx: usize) -> BlobContext {
1096        self.blobs.remove(idx)
1097    }
1098
1099    pub fn get_last_blob(&self) -> Option<&BlobContext> {
1100        self.blobs.last()
1101    }
1102
1103    pub fn get_blob_idx_by_id(&self, id: &str) -> Option<u32> {
1104        for (idx, blob) in self.blobs.iter().enumerate() {
1105            if blob.blob_id.eq(id) {
1106                return Some(idx as u32);
1107            }
1108        }
1109        None
1110    }
1111
1112    pub fn get_blob_ids(&self) -> Vec<String> {
1113        self.blobs.iter().map(|b| b.blob_id.to_owned()).collect()
1114    }
1115
1116    /// Prepend all blobs from `blob_table` to the blob manager.
1117    pub fn extend_from_blob_table(
1118        &mut self,
1119        ctx: &BuildContext,
1120        blob_table: Vec<Arc<BlobInfo>>,
1121    ) -> Result<()> {
1122        let mut blobs: Vec<BlobContext> = Vec::new();
1123        for blob in blob_table.iter() {
1124            let ctx = BlobContext::from(ctx, blob.as_ref(), ChunkSource::Parent)?;
1125            blobs.push(ctx);
1126        }
1127        if let Some(curr) = self.current_blob_index {
1128            self.current_blob_index = Some(curr + blobs.len() as u32);
1129            blobs.append(&mut self.blobs);
1130        } else {
1131            assert!(self.blobs.is_empty());
1132        }
1133        self.blobs = blobs;
1134        Ok(())
1135    }
1136
1137    /// Import all blobs from the global chunk dictionary for later chunk deduplication.
1138    ///
1139    /// The order to import blobs from parent bootstrap and chunk dictionary is important.
1140    /// All blobs from parent bootstrap must be imported first, otherwise we need to fix blob index
1141    /// of chunks from parent bootstrap.
1142    pub fn extend_from_chunk_dict(&mut self, ctx: &BuildContext) -> Result<()> {
1143        let blobs = self.global_chunk_dict.get_blobs();
1144
1145        for blob in blobs.iter() {
1146            if let Some(real_idx) = self.get_blob_idx_by_id(&blob.blob_id()) {
1147                self.global_chunk_dict
1148                    .set_real_blob_idx(blob.blob_index(), real_idx);
1149            } else {
1150                let idx = self.alloc_index()?;
1151                let ctx = BlobContext::from(ctx, blob.as_ref(), ChunkSource::Dict)?;
1152                self.add_blob(ctx);
1153                self.global_chunk_dict
1154                    .set_real_blob_idx(blob.blob_index(), idx);
1155            }
1156        }
1157
1158        Ok(())
1159    }
1160
1161    /// Generate a [RafsBlobTable] from all blobs managed by the manager.
1162    pub fn to_blob_table(&self, build_ctx: &BuildContext) -> Result<RafsBlobTable> {
1163        let mut blob_table = match build_ctx.fs_version {
1164            RafsVersion::V5 => RafsBlobTable::V5(RafsV5BlobTable::new()),
1165            RafsVersion::V6 => RafsBlobTable::V6(RafsV6BlobTable::new()),
1166        };
1167
1168        for ctx in &self.blobs {
1169            let blob_id = ctx.blob_id.clone();
1170            let blob_prefetch_size = u32::try_from(ctx.blob_prefetch_size)?;
1171            let chunk_count = ctx.chunk_count;
1172            let decompressed_blob_size = ctx.uncompressed_blob_size;
1173            let compressed_blob_size = ctx.compressed_blob_size;
1174            let mut flags = RafsSuperFlags::empty();
1175            match &mut blob_table {
1176                RafsBlobTable::V5(table) => {
1177                    let blob_features = BlobFeatures::from_bits(ctx.blob_meta_header.features())
1178                        .ok_or_else(|| anyhow!("invalid blob features"))?;
1179                    flags |= RafsSuperFlags::from(ctx.blob_compressor);
1180                    flags |= RafsSuperFlags::from(ctx.blob_digester);
1181                    table.add(
1182                        blob_id,
1183                        0,
1184                        blob_prefetch_size,
1185                        ctx.chunk_size,
1186                        chunk_count,
1187                        decompressed_blob_size,
1188                        compressed_blob_size,
1189                        blob_features,
1190                        flags,
1191                        build_ctx.is_chunkdict_generated,
1192                    );
1193                }
1194                RafsBlobTable::V6(table) => {
1195                    flags |= RafsSuperFlags::from(ctx.blob_compressor);
1196                    flags |= RafsSuperFlags::from(ctx.blob_digester);
1197                    flags |= RafsSuperFlags::from(ctx.blob_cipher);
1198                    table.add(
1199                        blob_id,
1200                        0,
1201                        blob_prefetch_size,
1202                        ctx.chunk_size,
1203                        chunk_count,
1204                        decompressed_blob_size,
1205                        compressed_blob_size,
1206                        flags,
1207                        ctx.blob_meta_digest,
1208                        ctx.blob_toc_digest,
1209                        ctx.blob_meta_size,
1210                        ctx.blob_toc_size,
1211                        build_ctx.is_chunkdict_generated,
1212                        ctx.blob_meta_header,
1213                        ctx.cipher_object.clone(),
1214                        ctx.cipher_ctx.clone(),
1215                    );
1216                }
1217            }
1218        }
1219
1220        Ok(blob_table)
1221    }
1222}
1223
1224/// BootstrapContext is used to hold in memory data of bootstrap during build.
1225pub struct BootstrapContext {
1226    /// This build has a parent bootstrap.
1227    pub layered: bool,
1228    /// Cache node index for hardlinks, HashMap<(layer_index, real_inode, dev), Vec<TreeNode>>.
1229    pub(crate) inode_map: HashMap<(u16, Inode, u64), Vec<TreeNode>>,
1230    /// Current position to write in f_bootstrap
1231    pub(crate) offset: u64,
1232    pub(crate) writer: Box<dyn RafsIoWrite>,
1233    /// Not fully used blocks
1234    pub(crate) v6_available_blocks: Vec<VecDeque<u64>>,
1235
1236    next_ino: Inode,
1237}
1238
1239impl BootstrapContext {
1240    /// Create a new instance of [BootstrapContext].
1241    pub fn new(storage: Option<ArtifactStorage>, layered: bool) -> Result<Self> {
1242        let writer = if let Some(storage) = storage {
1243            Box::new(ArtifactFileWriter(ArtifactWriter::new(storage)?)) as Box<dyn RafsIoWrite>
1244        } else {
1245            Box::<ArtifactMemoryWriter>::default() as Box<dyn RafsIoWrite>
1246        };
1247
1248        Ok(Self {
1249            layered,
1250            inode_map: HashMap::new(),
1251            next_ino: 1,
1252            offset: EROFS_BLOCK_SIZE_4096,
1253            writer,
1254            v6_available_blocks: vec![
1255                VecDeque::new();
1256                EROFS_BLOCK_SIZE_4096 as usize / EROFS_INODE_SLOT_SIZE
1257            ],
1258        })
1259    }
1260
1261    /// Align the write position.
1262    pub fn align_offset(&mut self, align_size: u64) {
1263        if self.offset % align_size > 0 {
1264            self.offset = div_round_up(self.offset, align_size) * align_size;
1265        }
1266    }
1267
1268    /// Get the next available inode number.
1269    pub(crate) fn get_next_ino(&self) -> Inode {
1270        self.next_ino
1271    }
1272
1273    /// Generate next inode number.
1274    pub(crate) fn generate_next_ino(&mut self) -> Inode {
1275        let ino = self.next_ino;
1276        self.next_ino += 1;
1277        ino
1278    }
1279
1280    // Only used to allocate space for metadata(inode / inode + inline data).
1281    // Try to find an used block with no less than `size` space left.
1282    // If found it, return the offset where we can store data.
1283    // If not, return 0.
1284    pub(crate) fn allocate_available_block(&mut self, size: u64, block_size: u64) -> u64 {
1285        if size >= block_size {
1286            return 0;
1287        }
1288
1289        let min_idx = div_round_up(size, EROFS_INODE_SLOT_SIZE as u64) as usize;
1290        let max_idx = div_round_up(block_size, EROFS_INODE_SLOT_SIZE as u64) as usize;
1291
1292        for idx in min_idx..max_idx {
1293            let blocks = &mut self.v6_available_blocks[idx];
1294            if let Some(mut offset) = blocks.pop_front() {
1295                offset += block_size - (idx * EROFS_INODE_SLOT_SIZE) as u64;
1296                self.append_available_block(
1297                    offset + (min_idx * EROFS_INODE_SLOT_SIZE) as u64,
1298                    block_size,
1299                );
1300                return offset;
1301            }
1302        }
1303
1304        0
1305    }
1306
1307    // Append the block that `offset` belongs to corresponding deque.
1308    pub(crate) fn append_available_block(&mut self, offset: u64, block_size: u64) {
1309        if offset % block_size != 0 {
1310            let avail = block_size - offset % block_size;
1311            let idx = avail as usize / EROFS_INODE_SLOT_SIZE;
1312            self.v6_available_blocks[idx].push_back(round_down(offset, block_size));
1313        }
1314    }
1315}
1316
1317/// BootstrapManager is used to hold the parent bootstrap reader and create new bootstrap context.
1318#[derive(Clone)]
1319pub struct BootstrapManager {
1320    pub(crate) f_parent_path: Option<PathBuf>,
1321    pub(crate) bootstrap_storage: Option<ArtifactStorage>,
1322}
1323
1324impl BootstrapManager {
1325    /// Create a new instance of [BootstrapManager]
1326    pub fn new(bootstrap_storage: Option<ArtifactStorage>, f_parent_path: Option<String>) -> Self {
1327        Self {
1328            f_parent_path: f_parent_path.map(PathBuf::from),
1329            bootstrap_storage,
1330        }
1331    }
1332
1333    /// Create a new instance of [BootstrapContext]
1334    pub fn create_ctx(&self) -> Result<BootstrapContext> {
1335        BootstrapContext::new(self.bootstrap_storage.clone(), self.f_parent_path.is_some())
1336    }
1337}
1338
1339pub struct BuildContext {
1340    /// Blob id (user specified or sha256(blob)).
1341    pub blob_id: String,
1342
1343    /// When filling local blobcache file, chunks are arranged as per the
1344    /// `decompress_offset` within chunk info. Therefore, provide a new flag
1345    /// to image tool thus to align chunks in blob with 4k size.
1346    pub aligned_chunk: bool,
1347    /// Add a offset for compressed blob.
1348    pub blob_offset: u64,
1349    /// Blob chunk compress flag.
1350    pub compressor: compress::Algorithm,
1351    /// Inode and chunk digest algorithm flag.
1352    pub digester: digest::Algorithm,
1353    /// Blob encryption algorithm flag.
1354    pub cipher: crypt::Algorithm,
1355    pub crc32_algorithm: crc32::Algorithm,
1356    /// Save host uid gid in each inode.
1357    pub explicit_uidgid: bool,
1358    /// whiteout spec: overlayfs or oci
1359    pub whiteout_spec: WhiteoutSpec,
1360    /// Chunk slice size.
1361    pub chunk_size: u32,
1362    /// Batch chunk data size.
1363    pub batch_size: u32,
1364    /// Version number of output metadata and data blob.
1365    pub fs_version: RafsVersion,
1366    /// Whether any directory/file has extended attributes.
1367    pub has_xattr: bool,
1368
1369    /// Format conversion type.
1370    pub conversion_type: ConversionType,
1371    /// Path of source to build the image from:
1372    /// - Directory: `source_path` should be a directory path
1373    /// - StargzIndex: `source_path` should be a stargz index json file path
1374    pub source_path: PathBuf,
1375
1376    /// Track file/chunk prefetch state.
1377    pub prefetch: Prefetch,
1378
1379    /// Storage writing blob to single file or a directory.
1380    pub blob_storage: Option<ArtifactStorage>,
1381    pub external_blob_storage: Option<ArtifactStorage>,
1382    pub blob_zran_generator: Option<Mutex<ZranContextGenerator<File>>>,
1383    pub blob_batch_generator: Option<Mutex<BatchContextGenerator>>,
1384    pub blob_tar_reader: Option<BufReaderInfo<File>>,
1385    pub blob_features: BlobFeatures,
1386    pub blob_inline_meta: bool,
1387
1388    pub features: Features,
1389    pub configuration: Arc<ConfigV2>,
1390    /// Generate the blob cache and blob meta
1391    pub blob_cache_generator: Option<BlobCacheGenerator>,
1392
1393    /// Whether is chunkdict.
1394    pub is_chunkdict_generated: bool,
1395    /// Nydus attributes for different build behavior.
1396    pub attributes: Attributes,
1397}
1398
1399impl BuildContext {
1400    #[allow(clippy::too_many_arguments)]
1401    pub fn new(
1402        blob_id: String,
1403        aligned_chunk: bool,
1404        blob_offset: u64,
1405        compressor: compress::Algorithm,
1406        digester: digest::Algorithm,
1407        explicit_uidgid: bool,
1408        whiteout_spec: WhiteoutSpec,
1409        conversion_type: ConversionType,
1410        source_path: PathBuf,
1411        prefetch: Prefetch,
1412        blob_storage: Option<ArtifactStorage>,
1413        external_blob_storage: Option<ArtifactStorage>,
1414        blob_inline_meta: bool,
1415        features: Features,
1416        encrypt: bool,
1417        attributes: Attributes,
1418    ) -> Self {
1419        // It's a flag for images built with new nydus-image 2.2 and newer.
1420        let mut blob_features = BlobFeatures::CAP_TAR_TOC;
1421        if blob_inline_meta {
1422            blob_features |= BlobFeatures::INLINED_FS_META;
1423            blob_features |= BlobFeatures::HAS_TAR_HEADER;
1424        };
1425        if features.is_enabled(Feature::BlobToc) {
1426            blob_features |= BlobFeatures::HAS_TOC;
1427            blob_features |= BlobFeatures::HAS_TAR_HEADER;
1428        }
1429        if conversion_type == ConversionType::TarToTarfs {
1430            blob_features |= BlobFeatures::TARFS;
1431        }
1432
1433        let cipher = if encrypt {
1434            crypt::Algorithm::Aes128Xts
1435        } else {
1436            crypt::Algorithm::None
1437        };
1438
1439        let crc32_algorithm = crc32::Algorithm::Crc32Iscsi;
1440        BuildContext {
1441            blob_id,
1442            aligned_chunk,
1443            blob_offset,
1444            compressor,
1445            digester,
1446            cipher,
1447            crc32_algorithm,
1448            explicit_uidgid,
1449            whiteout_spec,
1450
1451            chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32,
1452            batch_size: 0,
1453            fs_version: RafsVersion::default(),
1454
1455            conversion_type,
1456            source_path,
1457
1458            prefetch,
1459            blob_storage,
1460            external_blob_storage,
1461            blob_zran_generator: None,
1462            blob_batch_generator: None,
1463            blob_tar_reader: None,
1464            blob_features,
1465            blob_inline_meta,
1466            has_xattr: false,
1467
1468            features,
1469            configuration: Arc::new(ConfigV2::default()),
1470            blob_cache_generator: None,
1471            is_chunkdict_generated: false,
1472
1473            attributes,
1474        }
1475    }
1476
1477    pub fn set_fs_version(&mut self, fs_version: RafsVersion) {
1478        self.fs_version = fs_version;
1479    }
1480
1481    pub fn set_chunk_size(&mut self, chunk_size: u32) {
1482        self.chunk_size = chunk_size;
1483    }
1484
1485    pub fn set_batch_size(&mut self, batch_size: u32) {
1486        self.batch_size = batch_size;
1487    }
1488
1489    pub fn set_configuration(&mut self, config: Arc<ConfigV2>) {
1490        self.configuration = config;
1491    }
1492
1493    pub fn set_is_chunkdict(&mut self, is_chunkdict: bool) {
1494        self.is_chunkdict_generated = is_chunkdict;
1495    }
1496}
1497
1498impl Default for BuildContext {
1499    fn default() -> Self {
1500        Self {
1501            blob_id: String::new(),
1502            aligned_chunk: false,
1503            blob_offset: 0,
1504            compressor: compress::Algorithm::default(),
1505            digester: digest::Algorithm::default(),
1506            cipher: crypt::Algorithm::None,
1507            crc32_algorithm: crc32::Algorithm::default(),
1508            explicit_uidgid: true,
1509            whiteout_spec: WhiteoutSpec::default(),
1510
1511            chunk_size: RAFS_DEFAULT_CHUNK_SIZE as u32,
1512            batch_size: 0,
1513            fs_version: RafsVersion::default(),
1514
1515            conversion_type: ConversionType::default(),
1516            source_path: PathBuf::new(),
1517
1518            prefetch: Prefetch::default(),
1519            blob_storage: None,
1520            external_blob_storage: None,
1521            blob_zran_generator: None,
1522            blob_batch_generator: None,
1523            blob_tar_reader: None,
1524            blob_features: BlobFeatures::empty(),
1525            has_xattr: true,
1526            blob_inline_meta: false,
1527            features: Features::new(),
1528            configuration: Arc::new(ConfigV2::default()),
1529            blob_cache_generator: None,
1530            is_chunkdict_generated: false,
1531
1532            attributes: Attributes::default(),
1533        }
1534    }
1535}
1536
1537/// BuildOutput represents the output in this build.
1538#[derive(Default, Debug, Clone)]
1539pub struct BuildOutput {
1540    /// Blob ids in the blob table of bootstrap.
1541    pub blobs: Vec<String>,
1542    /// The size of output blob in this build.
1543    pub blob_size: Option<u64>,
1544    /// External blob ids in the blob table of external bootstrap.
1545    pub external_blobs: Vec<String>,
1546    /// File path for the metadata blob.
1547    pub bootstrap_path: Option<String>,
1548    /// File path for the external metadata blob.
1549    pub external_bootstrap_path: Option<String>,
1550}
1551
1552impl fmt::Display for BuildOutput {
1553    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1554        writeln!(
1555            f,
1556            "meta blob path: {}",
1557            self.bootstrap_path.as_deref().unwrap_or("<none>")
1558        )?;
1559        writeln!(
1560            f,
1561            "data blob size: 0x{:x}",
1562            self.blob_size.unwrap_or_default()
1563        )?;
1564        if self.external_blobs.is_empty() {
1565            write!(f, "data blobs: {:?}", self.blobs)?;
1566        } else {
1567            writeln!(f, "data blobs: {:?}", self.blobs)?;
1568            writeln!(
1569                f,
1570                "external meta blob path: {}",
1571                self.external_bootstrap_path.as_deref().unwrap_or("<none>")
1572            )?;
1573            write!(f, "external data blobs: {:?}", self.external_blobs)?;
1574        }
1575        Ok(())
1576    }
1577}
1578
1579impl BuildOutput {
1580    /// Create a new instance of [BuildOutput].
1581    pub fn new(
1582        blob_mgr: &BlobManager,
1583        external_blob_mgr: Option<&BlobManager>,
1584        bootstrap_storage: &Option<ArtifactStorage>,
1585        external_bootstrap_storage: &Option<ArtifactStorage>,
1586    ) -> Result<BuildOutput> {
1587        let blobs = blob_mgr.get_blob_ids();
1588        let blob_size = blob_mgr.get_last_blob().map(|b| b.compressed_blob_size);
1589        let bootstrap_path = bootstrap_storage
1590            .as_ref()
1591            .map(|stor| stor.display().to_string());
1592        let external_bootstrap_path = external_bootstrap_storage
1593            .as_ref()
1594            .map(|stor| stor.display().to_string());
1595        let external_blobs = external_blob_mgr
1596            .map(|mgr| mgr.get_blob_ids())
1597            .unwrap_or_default();
1598
1599        Ok(Self {
1600            blobs,
1601            external_blobs,
1602            blob_size,
1603            bootstrap_path,
1604            external_bootstrap_path,
1605        })
1606    }
1607}
1608
1609#[cfg(test)]
1610mod tests {
1611    use std::sync::atomic::AtomicBool;
1612
1613    use nydus_api::{BackendConfigV2, ConfigV2Internal, LocalFsConfig};
1614
1615    use super::*;
1616
1617    #[test]
1618    fn test_blob_context_from() {
1619        let mut blob = BlobInfo::new(
1620            1,
1621            "blob_id".to_string(),
1622            16,
1623            8,
1624            4,
1625            2,
1626            BlobFeatures::INLINED_FS_META | BlobFeatures::SEPARATE | BlobFeatures::HAS_TOC,
1627        );
1628        let root_dir = &std::env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR");
1629        let mut source_path = PathBuf::from(root_dir);
1630        source_path.push("../tests/texture/blobs/be7d77eeb719f70884758d1aa800ed0fb09d701aaec469964e9d54325f0d5fef");
1631        assert!(blob
1632            .set_blob_id_from_meta_path(source_path.as_path())
1633            .is_ok());
1634        blob.set_blob_meta_size(2);
1635        blob.set_blob_toc_size(2);
1636        blob.set_blob_meta_digest([32u8; 32]);
1637        blob.set_blob_toc_digest([64u8; 32]);
1638        blob.set_blob_meta_info(1, 2, 4, 8);
1639
1640        let mut ctx = BuildContext::default();
1641        ctx.configuration.internal.set_blob_accessible(true);
1642        let config = ConfigV2 {
1643            version: 2,
1644            backend: Some(BackendConfigV2 {
1645                backend_type: "localfs".to_owned(),
1646                localdisk: None,
1647                localfs: Some(LocalFsConfig {
1648                    blob_file: source_path.to_str().unwrap().to_owned(),
1649                    dir: "/tmp".to_owned(),
1650                    alt_dirs: vec!["/var/nydus/cache".to_owned()],
1651                }),
1652                oss: None,
1653                s3: None,
1654                registry: None,
1655                http_proxy: None,
1656            }),
1657            external_backends: Vec::new(),
1658            id: "id".to_owned(),
1659            cache: None,
1660            rafs: None,
1661            overlay: None,
1662            internal: ConfigV2Internal {
1663                blob_accessible: Arc::new(AtomicBool::new(true)),
1664            },
1665        };
1666        ctx.set_configuration(config.into());
1667
1668        let chunk_source = ChunkSource::Dict;
1669
1670        let blob_ctx = BlobContext::from(&ctx, &blob, chunk_source);
1671
1672        assert!(blob_ctx.is_ok());
1673        let blob_ctx = blob_ctx.unwrap();
1674        assert_eq!(blob_ctx.uncompressed_blob_size, 16);
1675        assert!(blob_ctx.blob_meta_info_enabled);
1676    }
1677}