Skip to main content

microsandbox_utils/index/
writer.rs

1//! Sidecar index writer.
2//!
3//! Builds binary index files from high-level calls. Used by OCI layer extraction
4//! to generate per-layer sidecar indexes, and by tests to construct valid/corrupt
5//! index data.
6
7use std::{io, path::Path};
8
9use super::{
10    DIR_FLAG_OPAQUE, DIR_RECORD_IDX_NONE, DirRecord, ENTRY_FLAG_WHITEOUT, EntryRecord, HardlinkRef,
11    INDEX_MAGIC, INDEX_VERSION, IndexHeader,
12};
13
14//--------------------------------------------------------------------------------------------------
15// Constants
16//--------------------------------------------------------------------------------------------------
17
18#[cfg(target_os = "linux")]
19const S_IFREG_MODE: u32 = libc::S_IFREG;
20#[cfg(target_os = "macos")]
21const S_IFREG_MODE: u32 = libc::S_IFREG as u32;
22
23#[cfg(target_os = "linux")]
24const S_IFDIR_MODE: u32 = libc::S_IFDIR;
25#[cfg(target_os = "macos")]
26const S_IFDIR_MODE: u32 = libc::S_IFDIR as u32;
27
28#[cfg(target_os = "linux")]
29const S_IFLNK_MODE: u32 = libc::S_IFLNK;
30#[cfg(target_os = "macos")]
31const S_IFLNK_MODE: u32 = libc::S_IFLNK as u32;
32
33#[cfg(target_os = "linux")]
34const S_IFMT_MODE: u32 = libc::S_IFMT;
35#[cfg(target_os = "macos")]
36const S_IFMT_MODE: u32 = libc::S_IFMT as u32;
37
38//--------------------------------------------------------------------------------------------------
39// Types
40//--------------------------------------------------------------------------------------------------
41
42/// Specification for a directory in the index.
43struct DirBuildSpec {
44    /// Directory path (`""` for root, `"etc"`, `"usr/bin"`).
45    path: String,
46    /// Flags (bit 0 = opaque).
47    flags: u8,
48    /// Child entries.
49    entries: Vec<EntryBuildSpec>,
50    /// Overflow tombstone names.
51    tombstones: Vec<String>,
52}
53
54/// Specification for an entry in the index.
55struct EntryBuildSpec {
56    /// Entry name (basename only, e.g. `"hosts"`).
57    name: String,
58    /// Host inode number.
59    host_ino: u64,
60    /// File size in bytes.
61    size: u64,
62    /// Full stat mode including `S_IFMT` type bits.
63    mode: u32,
64    /// Guest-visible uid.
65    uid: u32,
66    /// Guest-visible gid.
67    gid: u32,
68    /// Flags (bit 0 = whiteout).
69    flags: u8,
70}
71
72/// Builds valid sidecar index files from high-level calls.
73///
74/// # Example
75/// ```ignore
76/// let data = IndexBuilder::new()
77///     .dir("")
78///     .file("", "hello.txt", 0o644)
79///     .build();
80/// ```
81pub struct IndexBuilder {
82    dirs: Vec<DirBuildSpec>,
83    hardlinks: Vec<(u64, String)>,
84    /// Synthetic host_ino counter for convenience methods.
85    /// Not used by lookup (lookup stats the real file).
86    next_ino: u64,
87}
88
89//--------------------------------------------------------------------------------------------------
90// Methods
91//--------------------------------------------------------------------------------------------------
92
93impl IndexBuilder {
94    /// Create a new empty builder.
95    pub fn new() -> Self {
96        Self {
97            dirs: Vec::new(),
98            hardlinks: Vec::new(),
99            next_ino: 1000,
100        }
101    }
102
103    /// Add a directory to the index.
104    pub fn dir(mut self, path: &str) -> Self {
105        self.dirs.push(DirBuildSpec {
106            path: path.to_string(),
107            flags: 0,
108            entries: Vec::new(),
109            tombstones: Vec::new(),
110        });
111        self
112    }
113
114    /// Add an opaque directory to the index.
115    pub fn opaque_dir(mut self, path: &str) -> Self {
116        self.dirs.push(DirBuildSpec {
117            path: path.to_string(),
118            flags: DIR_FLAG_OPAQUE,
119            entries: Vec::new(),
120            tombstones: Vec::new(),
121        });
122        self
123    }
124
125    /// Add a regular file entry. Mode permissions only (S_IFREG is added automatically).
126    pub fn file(mut self, dir: &str, name: &str, mode: u32) -> Self {
127        let ino = self.next_ino;
128        self.next_ino += 1;
129        let dir_spec = self
130            .dirs
131            .iter_mut()
132            .find(|d| d.path == dir)
133            .unwrap_or_else(|| panic!("dir '{}' not found, add with .dir() first", dir));
134        dir_spec.entries.push(EntryBuildSpec {
135            name: name.to_string(),
136            host_ino: ino,
137            size: 0,
138            mode: S_IFREG_MODE | (mode & 0o7777),
139            uid: 0,
140            gid: 0,
141            flags: 0,
142        });
143        self
144    }
145
146    /// Add a subdirectory entry. Mode permissions only (S_IFDIR is added automatically).
147    pub fn subdir(mut self, dir: &str, name: &str, mode: u32) -> Self {
148        let ino = self.next_ino;
149        self.next_ino += 1;
150        let dir_spec = self
151            .dirs
152            .iter_mut()
153            .find(|d| d.path == dir)
154            .unwrap_or_else(|| panic!("dir '{}' not found", dir));
155        dir_spec.entries.push(EntryBuildSpec {
156            name: name.to_string(),
157            host_ino: ino,
158            size: 0,
159            mode: S_IFDIR_MODE | (mode & 0o7777),
160            uid: 0,
161            gid: 0,
162            flags: 0,
163        });
164        self
165    }
166
167    /// Add a symlink entry.
168    pub fn symlink(mut self, dir: &str, name: &str) -> Self {
169        let ino = self.next_ino;
170        self.next_ino += 1;
171        let dir_spec = self.dirs.iter_mut().find(|d| d.path == dir).unwrap();
172        dir_spec.entries.push(EntryBuildSpec {
173            name: name.to_string(),
174            host_ino: ino,
175            size: 0,
176            mode: S_IFLNK_MODE | 0o777,
177            uid: 0,
178            gid: 0,
179            flags: 0,
180        });
181        self
182    }
183
184    /// Add a whiteout entry (masks a name from lower layers).
185    pub fn whiteout(mut self, dir: &str, name: &str) -> Self {
186        let dir_spec = self.dirs.iter_mut().find(|d| d.path == dir).unwrap();
187        dir_spec.entries.push(EntryBuildSpec {
188            name: name.to_string(),
189            host_ino: 0,
190            size: 0,
191            mode: S_IFREG_MODE,
192            uid: 0,
193            gid: 0,
194            flags: ENTRY_FLAG_WHITEOUT,
195        });
196        self
197    }
198
199    /// Add an overflow tombstone name for a directory.
200    pub fn tombstone(mut self, dir: &str, name: &str) -> Self {
201        let dir_spec = self.dirs.iter_mut().find(|d| d.path == dir).unwrap();
202        dir_spec.tombstones.push(name.to_string());
203        self
204    }
205
206    /// Add a hardlink reference entry.
207    pub fn hardlink(mut self, ino: u64, path: &str) -> Self {
208        self.hardlinks.push((ino, path.to_string()));
209        self
210    }
211
212    /// Build valid index bytes.
213    pub fn build(mut self) -> Vec<u8> {
214        // Sort dirs by path (lexicographic byte comparison).
215        self.dirs
216            .sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
217
218        // Sort entries within each dir by name.
219        for dir in &mut self.dirs {
220            dir.entries
221                .sort_by(|a, b| a.name.as_bytes().cmp(b.name.as_bytes()));
222        }
223
224        // Sort hardlinks by ino, then path.
225        self.hardlinks
226            .sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
227
228        // Build string pool.
229        let mut pool = Vec::new();
230
231        // Dir paths.
232        let dir_path_offsets: Vec<(u32, u16)> = self
233            .dirs
234            .iter()
235            .map(|d| {
236                let off = pool.len() as u32;
237                let len = d.path.len() as u16;
238                pool.extend_from_slice(d.path.as_bytes());
239                (off, len)
240            })
241            .collect();
242
243        // Entry names.
244        let entry_name_offsets: Vec<Vec<(u32, u16)>> = self
245            .dirs
246            .iter()
247            .map(|d| {
248                d.entries
249                    .iter()
250                    .map(|e| {
251                        let off = pool.len() as u32;
252                        let len = e.name.len() as u16;
253                        pool.extend_from_slice(e.name.as_bytes());
254                        (off, len)
255                    })
256                    .collect()
257            })
258            .collect();
259
260        // Hardlink paths.
261        let hardlink_offsets: Vec<(u32, u32)> = self
262            .hardlinks
263            .iter()
264            .map(|(_, path)| {
265                let off = pool.len() as u32;
266                let len = path.len() as u32;
267                pool.extend_from_slice(path.as_bytes());
268                (off, len)
269            })
270            .collect();
271
272        // Tombstone data.
273        let tombstone_offsets: Vec<(u32, u16)> = self
274            .dirs
275            .iter()
276            .map(|d| {
277                if d.tombstones.is_empty() {
278                    (0, 0)
279                } else {
280                    let off = pool.len() as u32;
281                    for name in &d.tombstones {
282                        let len = name.len() as u16;
283                        pool.extend_from_slice(&len.to_le_bytes());
284                        pool.extend_from_slice(name.as_bytes());
285                    }
286                    (off, d.tombstones.len() as u16)
287                }
288            })
289            .collect();
290
291        // Sorted dir paths for dir_record_idx resolution.
292        let sorted_paths: Vec<&str> = self.dirs.iter().map(|d| d.path.as_str()).collect();
293
294        // Compute first_entry offsets.
295        let mut first_entries: Vec<u32> = Vec::new();
296        let mut offset = 0u32;
297        for dir in &self.dirs {
298            first_entries.push(offset);
299            offset += dir.entries.len() as u32;
300        }
301
302        let dir_count = self.dirs.len() as u32;
303        let entry_count: u32 = self.dirs.iter().map(|d| d.entries.len() as u32).sum();
304        let hardlink_ref_count = self.hardlinks.len() as u32;
305        let string_pool_size = pool.len() as u32;
306
307        let mut buf = Vec::with_capacity(
308            size_of::<IndexHeader>()
309                + dir_count as usize * size_of::<DirRecord>()
310                + entry_count as usize * size_of::<EntryRecord>()
311                + hardlink_ref_count as usize * size_of::<HardlinkRef>()
312                + string_pool_size as usize,
313        );
314
315        // Header (32 bytes).
316        buf.extend_from_slice(&INDEX_MAGIC.to_le_bytes());
317        buf.extend_from_slice(&INDEX_VERSION.to_le_bytes());
318        buf.extend_from_slice(&0u32.to_le_bytes()); // flags
319        buf.extend_from_slice(&dir_count.to_le_bytes());
320        buf.extend_from_slice(&entry_count.to_le_bytes());
321        buf.extend_from_slice(&hardlink_ref_count.to_le_bytes());
322        buf.extend_from_slice(&string_pool_size.to_le_bytes());
323        buf.extend_from_slice(&0u32.to_le_bytes()); // checksum placeholder
324        assert_eq!(buf.len(), 32);
325
326        // DirRecords (24 bytes each).
327        for (i, dir) in self.dirs.iter().enumerate() {
328            let (path_off, path_len) = dir_path_offsets[i];
329            let (tomb_off, tomb_count) = tombstone_offsets[i];
330            buf.extend_from_slice(&path_off.to_le_bytes());
331            buf.extend_from_slice(&path_len.to_le_bytes());
332            buf.push(dir.flags);
333            buf.push(0); // _pad
334            buf.extend_from_slice(&first_entries[i].to_le_bytes());
335            buf.extend_from_slice(&(dir.entries.len() as u32).to_le_bytes());
336            buf.extend_from_slice(&tomb_off.to_le_bytes());
337            buf.extend_from_slice(&tomb_count.to_le_bytes());
338            buf.extend_from_slice(&0u16.to_le_bytes()); // _pad2
339        }
340
341        // EntryRecords (40 bytes each).
342        for (dir_idx, dir) in self.dirs.iter().enumerate() {
343            for (entry_idx, entry) in dir.entries.iter().enumerate() {
344                let (name_off, name_len) = entry_name_offsets[dir_idx][entry_idx];
345
346                // Auto-compute dir_record_idx for directory entries.
347                let dir_record_idx = if entry.mode & S_IFMT_MODE == S_IFDIR_MODE {
348                    let child_path = if dir.path.is_empty() {
349                        entry.name.clone()
350                    } else {
351                        format!("{}/{}", dir.path, entry.name)
352                    };
353                    sorted_paths
354                        .binary_search(&child_path.as_str())
355                        .map(|i| i as u32)
356                        .unwrap_or(DIR_RECORD_IDX_NONE)
357                } else {
358                    DIR_RECORD_IDX_NONE
359                };
360
361                buf.extend_from_slice(&entry.host_ino.to_le_bytes());
362                buf.extend_from_slice(&entry.size.to_le_bytes());
363                buf.extend_from_slice(&name_off.to_le_bytes());
364                buf.extend_from_slice(&entry.mode.to_le_bytes());
365                buf.extend_from_slice(&entry.uid.to_le_bytes());
366                buf.extend_from_slice(&entry.gid.to_le_bytes());
367                buf.extend_from_slice(&name_len.to_le_bytes());
368                buf.push(entry.flags);
369                buf.push(0); // _pad
370                buf.extend_from_slice(&dir_record_idx.to_le_bytes());
371            }
372        }
373
374        // HardlinkRefs (16 bytes each).
375        for (i, (ino, _)) in self.hardlinks.iter().enumerate() {
376            let (path_off, path_len) = hardlink_offsets[i];
377            buf.extend_from_slice(&ino.to_le_bytes());
378            buf.extend_from_slice(&path_off.to_le_bytes());
379            buf.extend_from_slice(&path_len.to_le_bytes());
380        }
381
382        // String pool.
383        buf.append(&mut pool);
384
385        // Compute CRC32C incrementally (checksum field at bytes 28..32 treated as zeroed).
386        let crc = crc32c::crc32c(&buf[..28]);
387        let crc = crc32c::crc32c_append(crc, &[0u8; 4]);
388        let crc = crc32c::crc32c_append(crc, &buf[32..]);
389        buf[28..32].copy_from_slice(&crc.to_le_bytes());
390
391        buf
392    }
393
394    /// Write valid index to a file.
395    pub fn build_to_file(self, path: &Path) -> io::Result<()> {
396        std::fs::write(path, self.build())
397    }
398}
399
400//--------------------------------------------------------------------------------------------------
401// Trait Implementations
402//--------------------------------------------------------------------------------------------------
403
404impl Default for IndexBuilder {
405    fn default() -> Self {
406        Self::new()
407    }
408}