Skip to main content

microsandbox_image/erofs/
reader.rs

1//! Minimal EROFS reader for extracting file contents from our own images.
2//!
3//! Only supports the subset of EROFS that our writer produces:
4//! - Extended inodes (64 bytes)
5//! - Uncompressed data (FLAT_PLAIN or FLAT_INLINE)
6//! - Sorted directory entries (binary search)
7//! - No shared xattrs, no compression, no chunks
8
9use std::collections::HashSet;
10use std::io::Read;
11use std::os::unix::ffi::{OsStrExt, OsStringExt};
12use std::os::unix::fs::FileExt;
13use std::path::Path;
14use std::{ffi::OsString, fs::File, io, path::PathBuf};
15
16use super::format::{
17    EROFS_BLKSIZ, EROFS_DIRENT_SIZE, EROFS_INODE_EXTENDED_SIZE, EROFS_INODE_FLAT_INLINE,
18    EROFS_INODE_FLAT_PLAIN, EROFS_NULL_ADDR, EROFS_SUPER_OFFSET, EROFS_XATTR_IBODY_HEADER_SIZE,
19    EROFS_XATTR_INDEX_SECURITY, EROFS_XATTR_INDEX_TRUSTED, EROFS_XATTR_INDEX_USER, S_IFBLK,
20    S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, erofs_xattr_align,
21};
22use crate::tree::{InodeMetadata, Xattr};
23
24//--------------------------------------------------------------------------------------------------
25// Types
26//--------------------------------------------------------------------------------------------------
27
28/// A handle to an open EROFS image for reading.
29pub struct ErofsReader {
30    file: File,
31    meta_blkaddr: u32,
32    root_nid: u32,
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum ErofsEntryKind {
37    RegularFile,
38    Directory,
39    Symlink,
40    CharDevice,
41    BlockDevice,
42    Fifo,
43    Socket,
44}
45
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct ErofsEntryInfo {
48    pub kind: ErofsEntryKind,
49    pub opaque: bool,
50    pub whiteout: bool,
51}
52
53/// A filesystem entry discovered while walking an EROFS image.
54#[derive(Clone)]
55pub struct ErofsTreeEntry {
56    /// Path relative to the image root.
57    pub path: PathBuf,
58    /// Stable EROFS inode identifier.
59    pub nid: u32,
60    /// Entry kind.
61    pub kind: ErofsEntryKind,
62    /// POSIX inode metadata.
63    pub metadata: InodeMetadata,
64    /// Inline xattrs stored on the inode.
65    pub xattrs: Vec<Xattr>,
66    /// File or symlink data size.
67    pub size: u64,
68    /// Device major/minor for device nodes.
69    pub rdev: Option<(u32, u32)>,
70}
71
72/// Streaming reader for a regular file stored inside an EROFS image.
73pub struct ErofsFileDataReader {
74    file: File,
75    segments: Vec<(u64, u64)>,
76    segment_index: usize,
77    segment_offset: u64,
78}
79
80#[cfg(test)]
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub(crate) struct ErofsInodeDebugInfo {
83    pub nid: u32,
84    pub nlink: u32,
85    pub size: u64,
86    pub data_layout: u8,
87}
88
89//--------------------------------------------------------------------------------------------------
90// Methods
91//--------------------------------------------------------------------------------------------------
92
93impl ErofsReader {
94    /// Open an EROFS image by parsing the superblock.
95    pub fn new(file: File) -> io::Result<Self> {
96        let mut sb = [0u8; 128];
97        read_exact_at(&file, EROFS_SUPER_OFFSET, &mut sb)?;
98
99        let magic = u32::from_le_bytes([sb[0], sb[1], sb[2], sb[3]]);
100        if magic != 0xE0F5_E1E2 {
101            return Err(io::Error::new(
102                io::ErrorKind::InvalidData,
103                format!("bad EROFS magic: {magic:#x}"),
104            ));
105        }
106
107        let root_nid = u16::from_le_bytes([sb[0x0E], sb[0x0F]]) as u32;
108        let meta_blkaddr = u32::from_le_bytes([sb[0x28], sb[0x29], sb[0x2A], sb[0x2B]]);
109
110        Ok(Self {
111            file,
112            meta_blkaddr,
113            root_nid,
114        })
115    }
116
117    /// Read a file by path from the EROFS image. Returns the file data.
118    pub fn read_file(&mut self, path: &str) -> io::Result<Vec<u8>> {
119        let target_inode = self.lookup_path(path)?;
120        if (target_inode.mode & S_IFMT) != S_IFREG {
121            return Err(io::Error::new(
122                io::ErrorKind::InvalidInput,
123                "target is not a regular file",
124            ));
125        }
126        self.read_inode_data(&target_inode)
127    }
128
129    /// Read a symlink target by path from the EROFS image.
130    pub fn read_link(&mut self, path: &str) -> io::Result<Vec<u8>> {
131        let target_inode = self.lookup_path(path)?;
132        if (target_inode.mode & S_IFMT) != S_IFLNK {
133            return Err(io::Error::new(
134                io::ErrorKind::InvalidInput,
135                "target is not a symlink",
136            ));
137        }
138        self.read_inode_data(&target_inode)
139    }
140
141    pub fn entry_info(&mut self, path: &str) -> io::Result<ErofsEntryInfo> {
142        let inode = self.lookup_path(path)?;
143        let kind = inode_kind(&inode)?;
144        let opaque = if kind == ErofsEntryKind::Directory {
145            self.inode_is_opaque(&inode)?
146        } else {
147            false
148        };
149        let whiteout = kind == ErofsEntryKind::CharDevice && inode.rdev == 0;
150
151        Ok(ErofsEntryInfo {
152            kind,
153            opaque,
154            whiteout,
155        })
156    }
157
158    /// Walk all entries in the image in stable path order.
159    pub fn walk(&mut self) -> io::Result<Vec<ErofsTreeEntry>> {
160        let root = self.read_inode(self.root_nid)?;
161        let mut entries = Vec::new();
162        let mut visited = HashSet::new();
163        self.walk_dir(&root, PathBuf::new(), &mut entries, &mut visited)?;
164        Ok(entries)
165    }
166
167    /// Walk all entries in stable path order, invoking a callback for each entry.
168    pub fn walk_entries<E, F>(&mut self, mut visit: F) -> Result<(), E>
169    where
170        E: From<io::Error>,
171        F: FnMut(&mut Self, ErofsTreeEntry) -> Result<(), E>,
172    {
173        let root = self.read_inode(self.root_nid)?;
174        let mut visited = HashSet::new();
175        self.walk_dir_entries(&root, PathBuf::new(), &mut visited, &mut visit)
176    }
177
178    /// Create a streaming reader for a regular file inode by NID.
179    pub fn file_data_reader(&mut self, nid: u32) -> io::Result<ErofsFileDataReader> {
180        let inode = self.read_inode(nid)?;
181        if (inode.mode & S_IFMT) != S_IFREG {
182            return Err(io::Error::new(
183                io::ErrorKind::InvalidInput,
184                "target is not a regular file",
185            ));
186        }
187
188        Ok(ErofsFileDataReader {
189            file: self.file.try_clone()?,
190            segments: self.inode_data_segments(&inode)?,
191            segment_index: 0,
192            segment_offset: 0,
193        })
194    }
195
196    /// Read a symlink target by NID.
197    pub fn read_link_by_nid(&mut self, nid: u32) -> io::Result<Vec<u8>> {
198        let inode = self.read_inode(nid)?;
199        if (inode.mode & S_IFMT) != S_IFLNK {
200            return Err(io::Error::new(
201                io::ErrorKind::InvalidInput,
202                "target is not a symlink",
203            ));
204        }
205        self.read_inode_data(&inode)
206    }
207
208    #[cfg(test)]
209    pub(crate) fn inode_debug_info(&mut self, path: &str) -> io::Result<ErofsInodeDebugInfo> {
210        let inode = self.lookup_path(path)?;
211        Ok(ErofsInodeDebugInfo {
212            nid: inode.nid,
213            nlink: inode.nlink,
214            size: inode.size,
215            data_layout: inode.data_layout,
216        })
217    }
218
219    fn inode_offset(&self, nid: u32) -> u64 {
220        (self.meta_blkaddr as u64) * (EROFS_BLKSIZ as u64) + (nid as u64) * 32
221    }
222
223    fn read_inode(&mut self, nid: u32) -> io::Result<InodeInfo> {
224        let offset = self.inode_offset(nid);
225
226        let mut buf = [0u8; EROFS_INODE_EXTENDED_SIZE as usize];
227        read_exact_at(&self.file, offset, &mut buf)?;
228
229        let i_format = u16::from_le_bytes([buf[0], buf[1]]);
230        let i_xattr_icount = u16::from_le_bytes([buf[2], buf[3]]);
231        let mode = u16::from_le_bytes([buf[4], buf[5]]);
232        let size = u64::from_le_bytes([
233            buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
234        ]);
235        let i_u = u32::from_le_bytes([buf[16], buf[17], buf[18], buf[19]]);
236        let nlink = u32::from_le_bytes([buf[44], buf[45], buf[46], buf[47]]);
237        let uid = u32::from_le_bytes([buf[24], buf[25], buf[26], buf[27]]);
238        let gid = u32::from_le_bytes([buf[28], buf[29], buf[30], buf[31]]);
239        let mtime = u64::from_le_bytes([
240            buf[32], buf[33], buf[34], buf[35], buf[36], buf[37], buf[38], buf[39],
241        ]);
242        let mtime_nsec = u32::from_le_bytes([buf[40], buf[41], buf[42], buf[43]]);
243
244        let data_layout = ((i_format >> 1) & 0x07) as u8;
245
246        // Compute xattr ibody size to know where inline data starts.
247        // Formula from EROFS spec: ibody = 12-byte header + (i_xattr_icount - 1) * 4 bytes.
248        // The "- 1" accounts for the header occupying the first count unit.
249        let xattr_ibody_size = if i_xattr_icount == 0 {
250            0u32
251        } else {
252            12 + ((i_xattr_icount as u32) - 1) * 4
253        };
254
255        Ok(InodeInfo {
256            nid,
257            mode,
258            size,
259            nlink,
260            uid,
261            gid,
262            mtime,
263            mtime_nsec,
264            data_layout,
265            startblk_lo: i_u,
266            rdev: i_u,
267            xattr_ibody_size,
268        })
269    }
270
271    fn lookup_path(&mut self, path: &str) -> io::Result<InodeInfo> {
272        let components: Vec<&str> = path
273            .trim_start_matches('/')
274            .split('/')
275            .filter(|c| !c.is_empty())
276            .collect();
277
278        if components.is_empty() {
279            if path == "/" {
280                return self.read_inode(self.root_nid);
281            }
282            return Err(io::Error::new(io::ErrorKind::InvalidInput, "empty path"));
283        }
284
285        let mut current_nid = self.root_nid;
286        for (i, component) in components.iter().enumerate() {
287            let inode = self.read_inode(current_nid)?;
288            let mode_type = inode.mode & S_IFMT;
289
290            if mode_type != S_IFDIR {
291                return Err(io::Error::new(
292                    io::ErrorKind::NotFound,
293                    format!("not a directory at component '{component}'"),
294                ));
295            }
296
297            let target_nid = self.lookup_in_dir(&inode, component)?;
298            if i + 1 == components.len() {
299                return self.read_inode(target_nid);
300            }
301
302            current_nid = target_nid;
303        }
304
305        Err(io::Error::new(io::ErrorKind::NotFound, "path not found"))
306    }
307
308    /// Look up a named entry in a directory inode's data.
309    ///
310    /// EROFS directory data is organized as self-contained blocks. Each block
311    /// starts with a packed array of 12-byte dirent headers, followed by the
312    /// concatenated name strings. The first dirent's `nameoff` field divided
313    /// by 12 gives the number of dirents in that block (the kernel uses this
314    /// same trick). Name lengths are derived from consecutive `nameoff`
315    /// values; the last entry's name extends to the end of valid data.
316    fn lookup_in_dir(&mut self, dir_inode: &InodeInfo, name: &str) -> io::Result<u32> {
317        let blksiz = EROFS_BLKSIZ as usize;
318        let target = name.as_bytes();
319        let block_count = self.checked_inode_data_len(dir_inode)?.div_ceil(blksiz);
320        let mut left = 0usize;
321        let mut right = block_count;
322
323        while left < right {
324            let mid = (left + right) / 2;
325            let block = self.read_inode_data_block(dir_inode, mid)?;
326            let dirent_count = dir_block_dirent_count(&block)?;
327            let first_name = dirent_name(&block, 0, dirent_count)?;
328            let last_name = dirent_name(&block, dirent_count - 1, dirent_count)?;
329
330            if target < first_name {
331                right = mid;
332                continue;
333            }
334
335            if target > last_name {
336                left = mid + 1;
337                continue;
338            }
339
340            return lookup_in_dir_block(&block, dirent_count, target)?.ok_or_else(|| {
341                io::Error::new(
342                    io::ErrorKind::NotFound,
343                    format!("entry '{name}' not found in directory"),
344                )
345            });
346        }
347
348        Err(io::Error::new(
349            io::ErrorKind::NotFound,
350            format!("entry '{name}' not found in directory"),
351        ))
352    }
353
354    fn walk_dir(
355        &mut self,
356        dir_inode: &InodeInfo,
357        dir_path: PathBuf,
358        entries: &mut Vec<ErofsTreeEntry>,
359        visited: &mut HashSet<u32>,
360    ) -> io::Result<()> {
361        if !visited.insert(dir_inode.nid) {
362            return Err(io::Error::new(
363                io::ErrorKind::InvalidData,
364                "cycle detected while walking EROFS directory tree",
365            ));
366        }
367
368        self.visit_dir_entries::<io::Error, _>(dir_inode, &mut |reader, name, nid| {
369            if name.as_bytes() == b"." || name.as_bytes() == b".." {
370                return Ok(());
371            }
372
373            let path = dir_path.join(&name);
374            let inode = reader.read_inode(nid)?;
375            let entry = reader.tree_entry(path.clone(), &inode)?;
376            let is_dir = entry.kind == ErofsEntryKind::Directory;
377            entries.push(entry);
378
379            if is_dir {
380                reader.walk_dir(&inode, path, entries, visited)?;
381            }
382            Ok(())
383        })?;
384
385        Ok(())
386    }
387
388    fn walk_dir_entries<E, F>(
389        &mut self,
390        dir_inode: &InodeInfo,
391        dir_path: PathBuf,
392        visited: &mut HashSet<u32>,
393        visit: &mut F,
394    ) -> Result<(), E>
395    where
396        E: From<io::Error>,
397        F: FnMut(&mut Self, ErofsTreeEntry) -> Result<(), E>,
398    {
399        if !visited.insert(dir_inode.nid) {
400            return Err(io::Error::new(
401                io::ErrorKind::InvalidData,
402                "cycle detected while walking EROFS directory tree",
403            )
404            .into());
405        }
406
407        self.visit_dir_entries::<E, _>(dir_inode, &mut |reader, name, nid| {
408            if name.as_bytes() == b"." || name.as_bytes() == b".." {
409                return Ok(());
410            }
411
412            let path = dir_path.join(&name);
413            let inode = reader.read_inode(nid)?;
414            let entry = reader.tree_entry(path.clone(), &inode)?;
415            let is_dir = entry.kind == ErofsEntryKind::Directory;
416            visit(reader, entry)?;
417
418            if is_dir {
419                reader.walk_dir_entries(&inode, path, visited, visit)?;
420            }
421            Ok(())
422        })?;
423
424        Ok(())
425    }
426
427    fn visit_dir_entries<E, F>(&mut self, dir_inode: &InodeInfo, visit: &mut F) -> Result<(), E>
428    where
429        E: From<io::Error>,
430        F: FnMut(&mut Self, OsString, u32) -> Result<(), E>,
431    {
432        if (dir_inode.mode & S_IFMT) != S_IFDIR {
433            return Err(
434                io::Error::new(io::ErrorKind::InvalidInput, "target is not a directory").into(),
435            );
436        }
437
438        let blksiz = EROFS_BLKSIZ as usize;
439        let block_count = self.checked_inode_data_len(dir_inode)?.div_ceil(blksiz);
440
441        for block_index in 0..block_count {
442            let block = self.read_inode_data_block(dir_inode, block_index)?;
443            if block.is_empty() {
444                continue;
445            }
446            let dirent_count = dir_block_dirent_count(&block)?;
447            for idx in 0..dirent_count {
448                let name = dirent_name(&block, idx, dirent_count)?;
449                if name.is_empty() {
450                    continue;
451                }
452                visit(
453                    self,
454                    OsString::from_vec(name.to_vec()),
455                    dirent_nid(&block, idx)?,
456                )?;
457            }
458        }
459
460        Ok(())
461    }
462
463    fn tree_entry(&mut self, path: PathBuf, inode: &InodeInfo) -> io::Result<ErofsTreeEntry> {
464        let kind = inode_kind(inode)?;
465        let rdev = if matches!(
466            kind,
467            ErofsEntryKind::CharDevice | ErofsEntryKind::BlockDevice
468        ) {
469            Some(decode_dev(inode.rdev))
470        } else {
471            None
472        };
473
474        Ok(ErofsTreeEntry {
475            path,
476            nid: inode.nid,
477            kind,
478            metadata: inode.metadata(),
479            xattrs: self
480                .read_inode_xattrs(inode)?
481                .into_iter()
482                .map(|(name, value)| Xattr { name, value })
483                .collect(),
484            size: inode.size,
485            rdev,
486        })
487    }
488
489    fn read_inode_data(&mut self, inode: &InodeInfo) -> io::Result<Vec<u8>> {
490        let size = self.checked_inode_data_len(inode)?;
491        if size == 0 {
492            return Ok(Vec::new());
493        }
494
495        let blksiz = EROFS_BLKSIZ as usize;
496
497        match inode.data_layout {
498            EROFS_INODE_FLAT_PLAIN => {
499                if inode.startblk_lo == EROFS_NULL_ADDR {
500                    return Ok(Vec::new());
501                }
502                let data_offset = (inode.startblk_lo as u64) * (EROFS_BLKSIZ as u64);
503                let mut data = vec![0u8; size];
504                read_exact_at(&self.file, data_offset, &mut data)?;
505                Ok(data)
506            }
507            EROFS_INODE_FLAT_INLINE => {
508                let full_blocks = size / blksiz;
509                let tail_size = size % blksiz;
510                let mut data = Vec::with_capacity(size);
511
512                // Read full blocks from data area.
513                if full_blocks > 0 && inode.startblk_lo != EROFS_NULL_ADDR {
514                    let data_offset = (inode.startblk_lo as u64) * (EROFS_BLKSIZ as u64);
515                    let mut block_data = vec![0u8; full_blocks * blksiz];
516                    read_exact_at(&self.file, data_offset, &mut block_data)?;
517                    data.extend_from_slice(&block_data);
518                }
519
520                // Read inline tail from after inode metadata.
521                if tail_size > 0 {
522                    let inline_offset = self.inode_offset(inode.nid)
523                        + EROFS_INODE_EXTENDED_SIZE as u64
524                        + inode.xattr_ibody_size as u64;
525                    let mut tail = vec![0u8; tail_size];
526                    read_exact_at(&self.file, inline_offset, &mut tail)?;
527                    data.extend_from_slice(&tail);
528                }
529
530                Ok(data)
531            }
532            _ => Err(io::Error::new(
533                io::ErrorKind::Unsupported,
534                format!("unsupported data layout: {}", inode.data_layout),
535            )),
536        }
537    }
538
539    fn read_inode_data_block(&self, inode: &InodeInfo, block_index: usize) -> io::Result<Vec<u8>> {
540        let blksiz = EROFS_BLKSIZ as usize;
541        let size = self.checked_inode_data_len(inode)?;
542        let start = block_index.checked_mul(blksiz).ok_or_else(|| {
543            io::Error::new(io::ErrorKind::InvalidData, "directory block overflow")
544        })?;
545        if start >= size {
546            return Ok(Vec::new());
547        }
548
549        let remaining = size - start;
550        let len = remaining.min(blksiz);
551        self.read_inode_data_range(inode, start as u64, len)
552    }
553
554    fn read_inode_data_range(
555        &self,
556        inode: &InodeInfo,
557        start: u64,
558        len: usize,
559    ) -> io::Result<Vec<u8>> {
560        let size = self.checked_inode_data_len(inode)? as u64;
561        let end = start
562            .checked_add(len as u64)
563            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "inode range overflow"))?;
564        if end > size {
565            return Err(io::Error::new(
566                io::ErrorKind::InvalidData,
567                "inode data range exceeds inode size",
568            ));
569        }
570
571        let segments = self.inode_data_segments(inode)?;
572        let mut data = vec![0u8; len];
573        let mut copied = 0usize;
574        let mut logical_start = 0u64;
575
576        for (file_offset, segment_len) in segments {
577            let logical_end = logical_start.checked_add(segment_len).ok_or_else(|| {
578                io::Error::new(io::ErrorKind::InvalidData, "inode segment range overflow")
579            })?;
580            let overlap_start = start.max(logical_start);
581            let overlap_end = end.min(logical_end);
582
583            if overlap_start < overlap_end {
584                let dst_start = (overlap_start - start) as usize;
585                let read_len = (overlap_end - overlap_start) as usize;
586                let source_offset = file_offset
587                    .checked_add(overlap_start - logical_start)
588                    .ok_or_else(|| {
589                        io::Error::new(io::ErrorKind::InvalidData, "inode file offset overflow")
590                    })?;
591                read_exact_at(
592                    &self.file,
593                    source_offset,
594                    &mut data[dst_start..dst_start + read_len],
595                )?;
596                copied += read_len;
597            }
598
599            logical_start = logical_end;
600            if logical_start >= end {
601                break;
602            }
603        }
604
605        if copied != len {
606            return Err(io::Error::new(
607                io::ErrorKind::UnexpectedEof,
608                "inode data range is not fully backed",
609            ));
610        }
611
612        Ok(data)
613    }
614
615    fn checked_inode_data_len(&self, inode: &InodeInfo) -> io::Result<usize> {
616        let file_len = self.file.metadata()?.len();
617        if inode.size > file_len {
618            return Err(io::Error::new(
619                io::ErrorKind::InvalidData,
620                "inode data size exceeds EROFS image size",
621            ));
622        }
623
624        usize::try_from(inode.size).map_err(|_| {
625            io::Error::new(
626                io::ErrorKind::InvalidData,
627                "inode data size does not fit in memory",
628            )
629        })
630    }
631
632    fn inode_data_segments(&self, inode: &InodeInfo) -> io::Result<Vec<(u64, u64)>> {
633        let size = inode.size;
634        if size == 0 {
635            return Ok(Vec::new());
636        }
637
638        let blksiz = EROFS_BLKSIZ as u64;
639        match inode.data_layout {
640            EROFS_INODE_FLAT_PLAIN => {
641                if inode.startblk_lo == EROFS_NULL_ADDR {
642                    Ok(Vec::new())
643                } else {
644                    Ok(vec![((inode.startblk_lo as u64) * blksiz, size)])
645                }
646            }
647            EROFS_INODE_FLAT_INLINE => {
648                let full_blocks = size / blksiz;
649                let tail_size = size % blksiz;
650                let mut segments = Vec::new();
651                if full_blocks > 0 && inode.startblk_lo != EROFS_NULL_ADDR {
652                    segments.push(((inode.startblk_lo as u64) * blksiz, full_blocks * blksiz));
653                }
654                if tail_size > 0 {
655                    segments.push((
656                        self.inode_offset(inode.nid)
657                            + EROFS_INODE_EXTENDED_SIZE as u64
658                            + inode.xattr_ibody_size as u64,
659                        tail_size,
660                    ));
661                }
662                Ok(segments)
663            }
664            _ => Err(io::Error::new(
665                io::ErrorKind::Unsupported,
666                format!("unsupported data layout: {}", inode.data_layout),
667            )),
668        }
669    }
670
671    fn inode_is_opaque(&mut self, inode: &InodeInfo) -> io::Result<bool> {
672        for (name, value) in self.read_inode_xattrs(inode)? {
673            if name == b"trusted.overlay.opaque" && value == b"y" {
674                return Ok(true);
675            }
676        }
677
678        Ok(false)
679    }
680
681    fn read_inode_xattrs(&mut self, inode: &InodeInfo) -> io::Result<Vec<(Vec<u8>, Vec<u8>)>> {
682        if inode.xattr_ibody_size == 0 {
683            return Ok(Vec::new());
684        }
685
686        let total = inode.xattr_ibody_size as usize;
687        if total < EROFS_XATTR_IBODY_HEADER_SIZE as usize {
688            return Err(io::Error::new(
689                io::ErrorKind::InvalidData,
690                "xattr ibody smaller than header",
691            ));
692        }
693
694        let mut offset = self.inode_offset(inode.nid)
695            + EROFS_INODE_EXTENDED_SIZE as u64
696            + EROFS_XATTR_IBODY_HEADER_SIZE as u64;
697        let mut remaining = total - EROFS_XATTR_IBODY_HEADER_SIZE as usize;
698        let mut xattrs = Vec::new();
699
700        while remaining > 0 {
701            if remaining < 4 {
702                return Err(io::Error::new(
703                    io::ErrorKind::InvalidData,
704                    "truncated xattr entry header",
705                ));
706            }
707
708            let mut entry = [0u8; 4];
709            read_exact_at(&self.file, offset, &mut entry)?;
710
711            let name_len = entry[0] as usize;
712            let name_index = entry[1];
713            let value_len = u16::from_le_bytes([entry[2], entry[3]]) as usize;
714            let entry_size = 4 + name_len + value_len;
715            let aligned_size = erofs_xattr_align(entry_size);
716
717            if aligned_size > remaining {
718                return Err(io::Error::new(
719                    io::ErrorKind::InvalidData,
720                    "xattr entry exceeds ibody size",
721                ));
722            }
723
724            let mut suffix = vec![0u8; name_len];
725            read_exact_at(&self.file, offset + 4, &mut suffix)?;
726            let mut value = vec![0u8; value_len];
727            read_exact_at(&self.file, offset + 4 + name_len as u64, &mut value)?;
728
729            let name = match name_index {
730                EROFS_XATTR_INDEX_USER => [b"user.".as_slice(), suffix.as_slice()].concat(),
731                EROFS_XATTR_INDEX_TRUSTED => [b"trusted.".as_slice(), suffix.as_slice()].concat(),
732                EROFS_XATTR_INDEX_SECURITY => [b"security.".as_slice(), suffix.as_slice()].concat(),
733                other => {
734                    return Err(io::Error::new(
735                        io::ErrorKind::InvalidData,
736                        format!("unsupported xattr name index: {other}"),
737                    ));
738                }
739            };
740
741            xattrs.push((name, value));
742            offset += aligned_size as u64;
743            remaining -= aligned_size;
744        }
745
746        Ok(xattrs)
747    }
748}
749
750//--------------------------------------------------------------------------------------------------
751// Types: Internal
752//--------------------------------------------------------------------------------------------------
753
754struct InodeInfo {
755    nid: u32,
756    mode: u16,
757    size: u64,
758    #[allow(dead_code)]
759    nlink: u32,
760    uid: u32,
761    gid: u32,
762    mtime: u64,
763    mtime_nsec: u32,
764    data_layout: u8,
765    startblk_lo: u32,
766    rdev: u32,
767    xattr_ibody_size: u32,
768}
769
770impl InodeInfo {
771    fn metadata(&self) -> InodeMetadata {
772        InodeMetadata {
773            uid: self.uid,
774            gid: self.gid,
775            mode: self.mode,
776            mtime: self.mtime,
777            mtime_nsec: self.mtime_nsec,
778        }
779    }
780}
781
782impl ErofsTreeEntry {
783    /// Return true if this directory carries the overlay opaque marker.
784    pub fn is_opaque(&self) -> bool {
785        self.xattrs
786            .iter()
787            .any(|x| x.name == b"trusted.overlay.opaque" && x.value == b"y")
788    }
789}
790
791impl Read for ErofsFileDataReader {
792    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
793        if buf.is_empty() {
794            return Ok(0);
795        }
796
797        while self.segment_index < self.segments.len() {
798            let (offset, len) = self.segments[self.segment_index];
799            if self.segment_offset >= len {
800                self.segment_index += 1;
801                self.segment_offset = 0;
802                continue;
803            }
804
805            let remaining = (len - self.segment_offset) as usize;
806            let to_read = remaining.min(buf.len());
807            let read = self
808                .file
809                .read_at(&mut buf[..to_read], offset + self.segment_offset)?;
810            self.segment_offset += read as u64;
811            return Ok(read);
812        }
813
814        Ok(0)
815    }
816}
817
818//--------------------------------------------------------------------------------------------------
819// Functions
820//--------------------------------------------------------------------------------------------------
821
822fn read_exact_at(file: &File, offset: u64, mut buf: &mut [u8]) -> io::Result<()> {
823    let mut current_offset = offset;
824    while !buf.is_empty() {
825        let read = file.read_at(buf, current_offset)?;
826        if read == 0 {
827            return Err(io::Error::new(
828                io::ErrorKind::UnexpectedEof,
829                "unexpected EOF",
830            ));
831        }
832        current_offset += read as u64;
833        buf = &mut buf[read..];
834    }
835
836    Ok(())
837}
838
839fn dir_block_dirent_count(block: &[u8]) -> io::Result<usize> {
840    if block.len() < EROFS_DIRENT_SIZE as usize {
841        return Err(io::Error::new(
842            io::ErrorKind::InvalidData,
843            "directory block smaller than one dirent",
844        ));
845    }
846
847    let first_nameoff = u16::from_le_bytes([block[8], block[9]]) as usize;
848    let dirent_size = EROFS_DIRENT_SIZE as usize;
849    if first_nameoff < dirent_size
850        || !first_nameoff.is_multiple_of(dirent_size)
851        || first_nameoff > block.len()
852    {
853        return Err(io::Error::new(
854            io::ErrorKind::InvalidData,
855            "invalid first dirent name offset",
856        ));
857    }
858
859    Ok(first_nameoff / dirent_size)
860}
861
862fn dirent_name(block: &[u8], idx: usize, dirent_count: usize) -> io::Result<&[u8]> {
863    let dirent_size = EROFS_DIRENT_SIZE as usize;
864    let dirent_off = idx
865        .checked_mul(dirent_size)
866        .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "dirent offset overflow"))?;
867
868    if idx >= dirent_count || dirent_off + dirent_size > block.len() {
869        return Err(io::Error::new(
870            io::ErrorKind::InvalidData,
871            "dirent index out of bounds",
872        ));
873    }
874
875    let nameoff = u16::from_le_bytes([block[dirent_off + 8], block[dirent_off + 9]]) as usize;
876    let mut name_end = if idx + 1 < dirent_count {
877        let next_off = dirent_off + dirent_size;
878        u16::from_le_bytes([block[next_off + 8], block[next_off + 9]]) as usize
879    } else {
880        block.len()
881    };
882
883    if nameoff > name_end || name_end > block.len() {
884        return Err(io::Error::new(
885            io::ErrorKind::InvalidData,
886            "dirent name range out of bounds",
887        ));
888    }
889
890    while name_end > nameoff && block[name_end - 1] == 0 {
891        name_end -= 1;
892    }
893
894    Ok(&block[nameoff..name_end])
895}
896
897fn dirent_nid(block: &[u8], idx: usize) -> io::Result<u32> {
898    let dirent_size = EROFS_DIRENT_SIZE as usize;
899    let dirent_off = idx
900        .checked_mul(dirent_size)
901        .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "dirent offset overflow"))?;
902    if dirent_off + dirent_size > block.len() {
903        return Err(io::Error::new(
904            io::ErrorKind::InvalidData,
905            "dirent NID out of bounds",
906        ));
907    }
908
909    let nid = u64::from_le_bytes([
910        block[dirent_off],
911        block[dirent_off + 1],
912        block[dirent_off + 2],
913        block[dirent_off + 3],
914        block[dirent_off + 4],
915        block[dirent_off + 5],
916        block[dirent_off + 6],
917        block[dirent_off + 7],
918    ]);
919    u32::try_from(nid)
920        .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "dirent NID overflow"))
921}
922
923fn lookup_in_dir_block(
924    block: &[u8],
925    dirent_count: usize,
926    target: &[u8],
927) -> io::Result<Option<u32>> {
928    let mut left = 0usize;
929    let mut right = dirent_count;
930
931    while left < right {
932        let mid = (left + right) / 2;
933        match target.cmp(dirent_name(block, mid, dirent_count)?) {
934            std::cmp::Ordering::Less => right = mid,
935            std::cmp::Ordering::Greater => left = mid + 1,
936            std::cmp::Ordering::Equal => return dirent_nid(block, mid).map(Some),
937        }
938    }
939
940    Ok(None)
941}
942
943fn inode_kind(inode: &InodeInfo) -> io::Result<ErofsEntryKind> {
944    match inode.mode & S_IFMT {
945        S_IFREG => Ok(ErofsEntryKind::RegularFile),
946        S_IFDIR => Ok(ErofsEntryKind::Directory),
947        S_IFLNK => Ok(ErofsEntryKind::Symlink),
948        S_IFCHR => Ok(ErofsEntryKind::CharDevice),
949        S_IFBLK => Ok(ErofsEntryKind::BlockDevice),
950        S_IFIFO => Ok(ErofsEntryKind::Fifo),
951        S_IFSOCK => Ok(ErofsEntryKind::Socket),
952        other => Err(io::Error::new(
953            io::ErrorKind::InvalidData,
954            format!("unsupported inode mode type: {other:#o}"),
955        )),
956    }
957}
958
959fn decode_dev(encoded: u32) -> (u32, u32) {
960    let major = (encoded >> 8) & 0x0000_0fff;
961    let minor = (encoded & 0x0000_00ff) | ((encoded >> 12) & 0xffff_ff00);
962    (major, minor)
963}
964
965/// Read a file from an EROFS image file on disk.
966pub fn read_file_from_erofs(image_path: &Path, file_path: &str) -> io::Result<Vec<u8>> {
967    let file = std::fs::File::open(image_path)?;
968    let mut reader = ErofsReader::new(file)?;
969    reader.read_file(file_path)
970}
971
972pub fn entry_info_from_erofs(image_path: &Path, file_path: &str) -> io::Result<ErofsEntryInfo> {
973    let file = std::fs::File::open(image_path)?;
974    let mut reader = ErofsReader::new(file)?;
975    reader.entry_info(file_path)
976}
977
978//--------------------------------------------------------------------------------------------------
979// Tests
980//--------------------------------------------------------------------------------------------------
981
982#[cfg(test)]
983mod tests {
984    use std::{fs::File, io, path::PathBuf};
985
986    use tempfile::tempdir;
987
988    use super::ErofsReader;
989    use crate::{
990        erofs::write_erofs,
991        tree::{FileData, FileTree, InodeMetadata, RegularFileId, RegularFileNode, TreeNode},
992    };
993
994    fn make_regular_file(data: &[u8]) -> TreeNode {
995        make_regular_file_with_id(data, RegularFileId::new())
996    }
997
998    fn make_regular_file_with_id(data: &[u8], id: RegularFileId) -> TreeNode {
999        TreeNode::RegularFile(RegularFileNode {
1000            id,
1001            metadata: InodeMetadata::default(),
1002            xattrs: Vec::new(),
1003            data: FileData::Memory(data.to_vec()),
1004            nlink: 1,
1005        })
1006    }
1007
1008    #[test]
1009    fn lookup_path_resolves_large_multi_block_directory() {
1010        let mut tree = FileTree::new();
1011        for i in 0..5000 {
1012            let path = format!("dir/file-{i:04}.txt");
1013            tree.insert(path.as_bytes(), make_regular_file(b"x"))
1014                .expect("insert file");
1015        }
1016
1017        let output_dir = tempdir().expect("tempdir");
1018        let output = output_dir.path().join("large-dir.erofs");
1019        write_erofs(&tree, &output).expect("write erofs");
1020
1021        let file = File::open(&output).expect("open erofs");
1022        let mut reader = ErofsReader::new(file).expect("reader");
1023
1024        assert_eq!(reader.read_file("/dir/file-0000.txt").expect("first"), b"x");
1025        assert_eq!(
1026            reader.read_file("/dir/file-2500.txt").expect("middle"),
1027            b"x"
1028        );
1029        assert_eq!(reader.read_file("/dir/file-4999.txt").expect("last"), b"x");
1030
1031        let err = reader
1032            .entry_info("/dir/file-9999.txt")
1033            .expect_err("missing entry should fail");
1034        assert_eq!(err.kind(), io::ErrorKind::NotFound);
1035    }
1036
1037    #[test]
1038    fn hardlinked_regular_files_share_inode_and_data_blocks() {
1039        let mut tree = FileTree::new();
1040        let file_id = RegularFileId::new();
1041
1042        tree.insert(b"alpha", make_regular_file_with_id(b"shared", file_id))
1043            .expect("insert alpha");
1044        tree.insert(b"beta", make_regular_file_with_id(b"shared", file_id))
1045            .expect("insert beta");
1046
1047        let output_dir = tempdir().expect("tempdir");
1048        let output = output_dir.path().join("hardlinks.erofs");
1049        let data_map = write_erofs(&tree, &output).expect("write erofs");
1050        let alpha_path = PathBuf::from("alpha");
1051        let beta_path = PathBuf::from("beta");
1052
1053        assert_eq!(
1054            data_map
1055                .file_blocks
1056                .get(&alpha_path)
1057                .copied()
1058                .expect("alpha data map"),
1059            data_map
1060                .file_blocks
1061                .get(&beta_path)
1062                .copied()
1063                .expect("beta data map")
1064        );
1065
1066        let file = File::open(&output).expect("open erofs");
1067        let mut reader = ErofsReader::new(file).expect("reader");
1068        let alpha = reader.inode_debug_info("/alpha").expect("alpha inode");
1069        let beta = reader.inode_debug_info("/beta").expect("beta inode");
1070
1071        assert_eq!(alpha.nid, beta.nid);
1072        assert_eq!(alpha.nlink, 2);
1073        assert_eq!(beta.nlink, 2);
1074        assert_eq!(alpha.size, b"shared".len() as u64);
1075        assert_eq!(reader.read_file("/alpha").expect("read alpha"), b"shared");
1076        assert_eq!(reader.read_file("/beta").expect("read beta"), b"shared");
1077    }
1078}