Skip to main content

microsandbox_image/erofs/
reader.rs

1//! Minimal EROFS reader for extracting file contents from our own images.
2//!
3//! Only supports the subset of EROFS that our writer produces:
4//! - Extended inodes (64 bytes)
5//! - Uncompressed data (FLAT_PLAIN or FLAT_INLINE)
6//! - Sorted directory entries (binary search)
7//! - No shared xattrs, no compression, no chunks
8
9use std::os::unix::fs::FileExt;
10use std::path::Path;
11use std::{fs::File, io};
12
13use super::format::{
14    EROFS_BLKSIZ, EROFS_DIRENT_SIZE, EROFS_INODE_EXTENDED_SIZE, EROFS_INODE_FLAT_INLINE,
15    EROFS_INODE_FLAT_PLAIN, EROFS_NULL_ADDR, EROFS_SUPER_OFFSET, EROFS_XATTR_IBODY_HEADER_SIZE,
16    EROFS_XATTR_INDEX_SECURITY, EROFS_XATTR_INDEX_TRUSTED, EROFS_XATTR_INDEX_USER, S_IFBLK,
17    S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, erofs_xattr_align,
18};
19
20//--------------------------------------------------------------------------------------------------
21// Types
22//--------------------------------------------------------------------------------------------------
23
24/// A handle to an open EROFS image for reading.
25pub struct ErofsReader {
26    file: File,
27    meta_blkaddr: u32,
28    root_nid: u32,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum ErofsEntryKind {
33    RegularFile,
34    Directory,
35    Symlink,
36    CharDevice,
37    BlockDevice,
38    Fifo,
39    Socket,
40}
41
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct ErofsEntryInfo {
44    pub kind: ErofsEntryKind,
45    pub opaque: bool,
46    pub whiteout: bool,
47}
48
49//--------------------------------------------------------------------------------------------------
50// Methods
51//--------------------------------------------------------------------------------------------------
52
53impl ErofsReader {
54    /// Open an EROFS image by parsing the superblock.
55    pub fn new(file: File) -> io::Result<Self> {
56        let mut sb = [0u8; 128];
57        read_exact_at(&file, EROFS_SUPER_OFFSET, &mut sb)?;
58
59        let magic = u32::from_le_bytes([sb[0], sb[1], sb[2], sb[3]]);
60        if magic != 0xE0F5_E1E2 {
61            return Err(io::Error::new(
62                io::ErrorKind::InvalidData,
63                format!("bad EROFS magic: {magic:#x}"),
64            ));
65        }
66
67        let root_nid = u16::from_le_bytes([sb[0x0E], sb[0x0F]]) as u32;
68        let meta_blkaddr = u32::from_le_bytes([sb[0x28], sb[0x29], sb[0x2A], sb[0x2B]]);
69
70        Ok(Self {
71            file,
72            meta_blkaddr,
73            root_nid,
74        })
75    }
76
77    /// Read a file by path from the EROFS image. Returns the file data.
78    pub fn read_file(&mut self, path: &str) -> io::Result<Vec<u8>> {
79        let target_inode = self.lookup_path(path)?;
80        if (target_inode.mode & S_IFMT) != S_IFREG {
81            return Err(io::Error::new(
82                io::ErrorKind::InvalidInput,
83                "target is not a regular file",
84            ));
85        }
86        self.read_inode_data(&target_inode)
87    }
88
89    /// Read a symlink target by path from the EROFS image.
90    pub fn read_link(&mut self, path: &str) -> io::Result<Vec<u8>> {
91        let target_inode = self.lookup_path(path)?;
92        if (target_inode.mode & S_IFMT) != S_IFLNK {
93            return Err(io::Error::new(
94                io::ErrorKind::InvalidInput,
95                "target is not a symlink",
96            ));
97        }
98        self.read_inode_data(&target_inode)
99    }
100
101    pub fn entry_info(&mut self, path: &str) -> io::Result<ErofsEntryInfo> {
102        let inode = self.lookup_path(path)?;
103        let kind = inode_kind(&inode)?;
104        let opaque = if kind == ErofsEntryKind::Directory {
105            self.inode_is_opaque(&inode)?
106        } else {
107            false
108        };
109        let whiteout = kind == ErofsEntryKind::CharDevice && inode.rdev == 0;
110
111        Ok(ErofsEntryInfo {
112            kind,
113            opaque,
114            whiteout,
115        })
116    }
117
118    fn inode_offset(&self, nid: u32) -> u64 {
119        (self.meta_blkaddr as u64) * (EROFS_BLKSIZ as u64) + (nid as u64) * 32
120    }
121
122    fn read_inode(&mut self, nid: u32) -> io::Result<InodeInfo> {
123        let offset = self.inode_offset(nid);
124
125        let mut buf = [0u8; EROFS_INODE_EXTENDED_SIZE as usize];
126        read_exact_at(&self.file, offset, &mut buf)?;
127
128        let i_format = u16::from_le_bytes([buf[0], buf[1]]);
129        let i_xattr_icount = u16::from_le_bytes([buf[2], buf[3]]);
130        let mode = u16::from_le_bytes([buf[4], buf[5]]);
131        let size = u64::from_le_bytes([
132            buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
133        ]);
134        let i_u = u32::from_le_bytes([buf[16], buf[17], buf[18], buf[19]]);
135
136        let data_layout = ((i_format >> 1) & 0x07) as u8;
137
138        // Compute xattr ibody size to know where inline data starts.
139        // Formula from EROFS spec: ibody = 12-byte header + (i_xattr_icount - 1) * 4 bytes.
140        // The "- 1" accounts for the header occupying the first count unit.
141        let xattr_ibody_size = if i_xattr_icount == 0 {
142            0u32
143        } else {
144            12 + ((i_xattr_icount as u32) - 1) * 4
145        };
146
147        Ok(InodeInfo {
148            nid,
149            mode,
150            size,
151            data_layout,
152            startblk_lo: i_u,
153            rdev: i_u,
154            xattr_ibody_size,
155        })
156    }
157
158    fn lookup_path(&mut self, path: &str) -> io::Result<InodeInfo> {
159        let components: Vec<&str> = path
160            .trim_start_matches('/')
161            .split('/')
162            .filter(|c| !c.is_empty())
163            .collect();
164
165        if components.is_empty() {
166            if path == "/" {
167                return self.read_inode(self.root_nid);
168            }
169            return Err(io::Error::new(io::ErrorKind::InvalidInput, "empty path"));
170        }
171
172        let mut current_nid = self.root_nid;
173        for (i, component) in components.iter().enumerate() {
174            let inode = self.read_inode(current_nid)?;
175            let mode_type = inode.mode & S_IFMT;
176
177            if mode_type != S_IFDIR {
178                return Err(io::Error::new(
179                    io::ErrorKind::NotFound,
180                    format!("not a directory at component '{component}'"),
181                ));
182            }
183
184            let target_nid = self.lookup_in_dir(&inode, component)?;
185            if i + 1 == components.len() {
186                return self.read_inode(target_nid);
187            }
188
189            current_nid = target_nid;
190        }
191
192        Err(io::Error::new(io::ErrorKind::NotFound, "path not found"))
193    }
194
195    /// Look up a named entry in a directory inode's data.
196    ///
197    /// EROFS directory data is organized as self-contained blocks. Each block
198    /// starts with a packed array of 12-byte dirent headers, followed by the
199    /// concatenated name strings. The first dirent's `nameoff` field divided
200    /// by 12 gives the number of dirents in that block (the kernel uses this
201    /// same trick). Name lengths are derived from consecutive `nameoff`
202    /// values; the last entry's name extends to the end of valid data.
203    fn lookup_in_dir(&mut self, dir_inode: &InodeInfo, name: &str) -> io::Result<u32> {
204        let dir_data = self.read_inode_data(dir_inode)?;
205        let blksiz = EROFS_BLKSIZ as usize;
206        let target = name.as_bytes();
207        let block_count = dir_data.len().div_ceil(blksiz);
208        let mut left = 0usize;
209        let mut right = block_count;
210
211        while left < right {
212            let mid = (left + right) / 2;
213            let block = dir_block(&dir_data, mid, blksiz);
214            let dirent_count = dir_block_dirent_count(block)?;
215            let first_name = dirent_name(block, 0, dirent_count)?;
216            let last_name = dirent_name(block, dirent_count - 1, dirent_count)?;
217
218            if target < first_name {
219                right = mid;
220                continue;
221            }
222
223            if target > last_name {
224                left = mid + 1;
225                continue;
226            }
227
228            return lookup_in_dir_block(block, dirent_count, target)?.ok_or_else(|| {
229                io::Error::new(
230                    io::ErrorKind::NotFound,
231                    format!("entry '{name}' not found in directory"),
232                )
233            });
234        }
235
236        Err(io::Error::new(
237            io::ErrorKind::NotFound,
238            format!("entry '{name}' not found in directory"),
239        ))
240    }
241
242    fn read_inode_data(&mut self, inode: &InodeInfo) -> io::Result<Vec<u8>> {
243        let size = inode.size as usize;
244        if size == 0 {
245            return Ok(Vec::new());
246        }
247
248        let blksiz = EROFS_BLKSIZ as usize;
249
250        match inode.data_layout {
251            EROFS_INODE_FLAT_PLAIN => {
252                if inode.startblk_lo == EROFS_NULL_ADDR {
253                    return Ok(Vec::new());
254                }
255                let data_offset = (inode.startblk_lo as u64) * (EROFS_BLKSIZ as u64);
256                let mut data = vec![0u8; size];
257                read_exact_at(&self.file, data_offset, &mut data)?;
258                Ok(data)
259            }
260            EROFS_INODE_FLAT_INLINE => {
261                let full_blocks = size / blksiz;
262                let tail_size = size % blksiz;
263                let mut data = Vec::with_capacity(size);
264
265                // Read full blocks from data area.
266                if full_blocks > 0 && inode.startblk_lo != EROFS_NULL_ADDR {
267                    let data_offset = (inode.startblk_lo as u64) * (EROFS_BLKSIZ as u64);
268                    let mut block_data = vec![0u8; full_blocks * blksiz];
269                    read_exact_at(&self.file, data_offset, &mut block_data)?;
270                    data.extend_from_slice(&block_data);
271                }
272
273                // Read inline tail from after inode metadata.
274                if tail_size > 0 {
275                    let inline_offset = self.inode_offset(inode.nid)
276                        + EROFS_INODE_EXTENDED_SIZE as u64
277                        + inode.xattr_ibody_size as u64;
278                    let mut tail = vec![0u8; tail_size];
279                    read_exact_at(&self.file, inline_offset, &mut tail)?;
280                    data.extend_from_slice(&tail);
281                }
282
283                Ok(data)
284            }
285            _ => Err(io::Error::new(
286                io::ErrorKind::Unsupported,
287                format!("unsupported data layout: {}", inode.data_layout),
288            )),
289        }
290    }
291
292    fn inode_is_opaque(&mut self, inode: &InodeInfo) -> io::Result<bool> {
293        for (name, value) in self.read_inode_xattrs(inode)? {
294            if name == b"trusted.overlay.opaque" && value == b"y" {
295                return Ok(true);
296            }
297        }
298
299        Ok(false)
300    }
301
302    fn read_inode_xattrs(&mut self, inode: &InodeInfo) -> io::Result<Vec<(Vec<u8>, Vec<u8>)>> {
303        if inode.xattr_ibody_size == 0 {
304            return Ok(Vec::new());
305        }
306
307        let total = inode.xattr_ibody_size as usize;
308        if total < EROFS_XATTR_IBODY_HEADER_SIZE as usize {
309            return Err(io::Error::new(
310                io::ErrorKind::InvalidData,
311                "xattr ibody smaller than header",
312            ));
313        }
314
315        let mut offset = self.inode_offset(inode.nid)
316            + EROFS_INODE_EXTENDED_SIZE as u64
317            + EROFS_XATTR_IBODY_HEADER_SIZE as u64;
318        let mut remaining = total - EROFS_XATTR_IBODY_HEADER_SIZE as usize;
319        let mut xattrs = Vec::new();
320
321        while remaining > 0 {
322            if remaining < 4 {
323                return Err(io::Error::new(
324                    io::ErrorKind::InvalidData,
325                    "truncated xattr entry header",
326                ));
327            }
328
329            let mut entry = [0u8; 4];
330            read_exact_at(&self.file, offset, &mut entry)?;
331
332            let name_len = entry[0] as usize;
333            let name_index = entry[1];
334            let value_len = u16::from_le_bytes([entry[2], entry[3]]) as usize;
335            let entry_size = 4 + name_len + value_len;
336            let aligned_size = erofs_xattr_align(entry_size);
337
338            if aligned_size > remaining {
339                return Err(io::Error::new(
340                    io::ErrorKind::InvalidData,
341                    "xattr entry exceeds ibody size",
342                ));
343            }
344
345            let mut suffix = vec![0u8; name_len];
346            read_exact_at(&self.file, offset + 4, &mut suffix)?;
347            let mut value = vec![0u8; value_len];
348            read_exact_at(&self.file, offset + 4 + name_len as u64, &mut value)?;
349
350            let name = match name_index {
351                EROFS_XATTR_INDEX_USER => [b"user.".as_slice(), suffix.as_slice()].concat(),
352                EROFS_XATTR_INDEX_TRUSTED => [b"trusted.".as_slice(), suffix.as_slice()].concat(),
353                EROFS_XATTR_INDEX_SECURITY => [b"security.".as_slice(), suffix.as_slice()].concat(),
354                other => {
355                    return Err(io::Error::new(
356                        io::ErrorKind::InvalidData,
357                        format!("unsupported xattr name index: {other}"),
358                    ));
359                }
360            };
361
362            xattrs.push((name, value));
363            offset += aligned_size as u64;
364            remaining -= aligned_size;
365        }
366
367        Ok(xattrs)
368    }
369}
370
371//--------------------------------------------------------------------------------------------------
372// Types: Internal
373//--------------------------------------------------------------------------------------------------
374
375struct InodeInfo {
376    nid: u32,
377    mode: u16,
378    size: u64,
379    data_layout: u8,
380    startblk_lo: u32,
381    rdev: u32,
382    xattr_ibody_size: u32,
383}
384
385//--------------------------------------------------------------------------------------------------
386// Functions
387//--------------------------------------------------------------------------------------------------
388
389fn read_exact_at(file: &File, offset: u64, mut buf: &mut [u8]) -> io::Result<()> {
390    let mut current_offset = offset;
391    while !buf.is_empty() {
392        let read = file.read_at(buf, current_offset)?;
393        if read == 0 {
394            return Err(io::Error::new(
395                io::ErrorKind::UnexpectedEof,
396                "unexpected EOF",
397            ));
398        }
399        current_offset += read as u64;
400        buf = &mut buf[read..];
401    }
402
403    Ok(())
404}
405
406fn dir_block(dir_data: &[u8], block_idx: usize, blksiz: usize) -> &[u8] {
407    let offset = block_idx * blksiz;
408    let end = (offset + blksiz).min(dir_data.len());
409    &dir_data[offset..end]
410}
411
412fn dir_block_dirent_count(block: &[u8]) -> io::Result<usize> {
413    if block.len() < EROFS_DIRENT_SIZE as usize {
414        return Err(io::Error::new(
415            io::ErrorKind::InvalidData,
416            "directory block smaller than one dirent",
417        ));
418    }
419
420    let first_nameoff = u16::from_le_bytes([block[8], block[9]]) as usize;
421    let dirent_size = EROFS_DIRENT_SIZE as usize;
422    if first_nameoff < dirent_size
423        || !first_nameoff.is_multiple_of(dirent_size)
424        || first_nameoff > block.len()
425    {
426        return Err(io::Error::new(
427            io::ErrorKind::InvalidData,
428            "invalid first dirent name offset",
429        ));
430    }
431
432    Ok(first_nameoff / dirent_size)
433}
434
435fn dirent_name(block: &[u8], idx: usize, dirent_count: usize) -> io::Result<&[u8]> {
436    let dirent_size = EROFS_DIRENT_SIZE as usize;
437    let dirent_off = idx
438        .checked_mul(dirent_size)
439        .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "dirent offset overflow"))?;
440
441    if idx >= dirent_count || dirent_off + dirent_size > block.len() {
442        return Err(io::Error::new(
443            io::ErrorKind::InvalidData,
444            "dirent index out of bounds",
445        ));
446    }
447
448    let nameoff = u16::from_le_bytes([block[dirent_off + 8], block[dirent_off + 9]]) as usize;
449    let mut name_end = if idx + 1 < dirent_count {
450        let next_off = dirent_off + dirent_size;
451        u16::from_le_bytes([block[next_off + 8], block[next_off + 9]]) as usize
452    } else {
453        block.len()
454    };
455
456    if nameoff > name_end || name_end > block.len() {
457        return Err(io::Error::new(
458            io::ErrorKind::InvalidData,
459            "dirent name range out of bounds",
460        ));
461    }
462
463    while name_end > nameoff && block[name_end - 1] == 0 {
464        name_end -= 1;
465    }
466
467    Ok(&block[nameoff..name_end])
468}
469
470fn dirent_nid(block: &[u8], idx: usize) -> io::Result<u32> {
471    let dirent_size = EROFS_DIRENT_SIZE as usize;
472    let dirent_off = idx
473        .checked_mul(dirent_size)
474        .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "dirent offset overflow"))?;
475    if dirent_off + dirent_size > block.len() {
476        return Err(io::Error::new(
477            io::ErrorKind::InvalidData,
478            "dirent NID out of bounds",
479        ));
480    }
481
482    let nid = u64::from_le_bytes([
483        block[dirent_off],
484        block[dirent_off + 1],
485        block[dirent_off + 2],
486        block[dirent_off + 3],
487        block[dirent_off + 4],
488        block[dirent_off + 5],
489        block[dirent_off + 6],
490        block[dirent_off + 7],
491    ]);
492    u32::try_from(nid)
493        .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "dirent NID overflow"))
494}
495
496fn lookup_in_dir_block(
497    block: &[u8],
498    dirent_count: usize,
499    target: &[u8],
500) -> io::Result<Option<u32>> {
501    let mut left = 0usize;
502    let mut right = dirent_count;
503
504    while left < right {
505        let mid = (left + right) / 2;
506        match target.cmp(dirent_name(block, mid, dirent_count)?) {
507            std::cmp::Ordering::Less => right = mid,
508            std::cmp::Ordering::Greater => left = mid + 1,
509            std::cmp::Ordering::Equal => return dirent_nid(block, mid).map(Some),
510        }
511    }
512
513    Ok(None)
514}
515
516fn inode_kind(inode: &InodeInfo) -> io::Result<ErofsEntryKind> {
517    match inode.mode & S_IFMT {
518        S_IFREG => Ok(ErofsEntryKind::RegularFile),
519        S_IFDIR => Ok(ErofsEntryKind::Directory),
520        S_IFLNK => Ok(ErofsEntryKind::Symlink),
521        S_IFCHR => Ok(ErofsEntryKind::CharDevice),
522        S_IFBLK => Ok(ErofsEntryKind::BlockDevice),
523        S_IFIFO => Ok(ErofsEntryKind::Fifo),
524        S_IFSOCK => Ok(ErofsEntryKind::Socket),
525        other => Err(io::Error::new(
526            io::ErrorKind::InvalidData,
527            format!("unsupported inode mode type: {other:#o}"),
528        )),
529    }
530}
531
532/// Read a file from an EROFS image file on disk.
533pub fn read_file_from_erofs(image_path: &Path, file_path: &str) -> io::Result<Vec<u8>> {
534    let file = std::fs::File::open(image_path)?;
535    let mut reader = ErofsReader::new(file)?;
536    reader.read_file(file_path)
537}
538
539pub fn entry_info_from_erofs(image_path: &Path, file_path: &str) -> io::Result<ErofsEntryInfo> {
540    let file = std::fs::File::open(image_path)?;
541    let mut reader = ErofsReader::new(file)?;
542    reader.entry_info(file_path)
543}
544
545//--------------------------------------------------------------------------------------------------
546// Tests
547//--------------------------------------------------------------------------------------------------
548
549#[cfg(test)]
550mod tests {
551    use std::{fs::File, io};
552
553    use tempfile::tempdir;
554
555    use super::ErofsReader;
556    use crate::{
557        erofs::write_erofs,
558        filetree::{FileData, FileTree, InodeMetadata, RegularFileNode, TreeNode},
559    };
560
561    fn make_regular_file(data: &[u8]) -> TreeNode {
562        TreeNode::RegularFile(RegularFileNode {
563            metadata: InodeMetadata::default(),
564            xattrs: Vec::new(),
565            data: FileData::Memory(data.to_vec()),
566            nlink: 1,
567        })
568    }
569
570    #[test]
571    fn lookup_path_resolves_large_multi_block_directory() {
572        let mut tree = FileTree::new();
573        for i in 0..5000 {
574            let path = format!("dir/file-{i:04}.txt");
575            tree.insert(path.as_bytes(), make_regular_file(b"x"))
576                .expect("insert file");
577        }
578
579        let output_dir = tempdir().expect("tempdir");
580        let output = output_dir.path().join("large-dir.erofs");
581        write_erofs(&tree, &output).expect("write erofs");
582
583        let file = File::open(&output).expect("open erofs");
584        let mut reader = ErofsReader::new(file).expect("reader");
585
586        assert_eq!(reader.read_file("/dir/file-0000.txt").expect("first"), b"x");
587        assert_eq!(
588            reader.read_file("/dir/file-2500.txt").expect("middle"),
589            b"x"
590        );
591        assert_eq!(reader.read_file("/dir/file-4999.txt").expect("last"), b"x");
592
593        let err = reader
594            .entry_info("/dir/file-9999.txt")
595            .expect_err("missing entry should fail");
596        assert_eq!(err.kind(), io::ErrorKind::NotFound);
597    }
598}