Skip to main content

arcbox_ext4/
reader.rs

1// ext4 filesystem reader.
2//
3// Opens an ext4 disk image, parses the superblock, and builds an in-memory
4// file tree via BFS traversal.  Group descriptors and inodes are cached on
5// first access.
6
7use std::collections::HashMap;
8use std::fs::File;
9use std::io::{Read, Seek, SeekFrom};
10use std::path::Path;
11
12use crate::constants::*;
13use crate::dir;
14use crate::error::{ReadError, ReadResult};
15use crate::extent;
16use crate::file_tree::{BlockRange, FileTree, FileTreeNode, InodeNumber};
17use crate::types::*;
18
19/// Read-only ext4 filesystem reader.
20///
21/// Parses the superblock, lazily reads group descriptors and inodes, and
22/// maintains a [`FileTree`] that mirrors the on-disk directory hierarchy.
23pub struct Reader {
24    pub(crate) file: File,
25    superblock: SuperBlock,
26    group_descriptors: HashMap<u32, GroupDescriptor>,
27    inodes: HashMap<InodeNumber, Inode>,
28    tree: FileTree,
29    /// Paths that are hard links to already-seen inodes.
30    /// Key: path string (e.g. "usr/bin/link"), Value: target inode number.
31    pub hardlinks: HashMap<String, InodeNumber>,
32}
33
34impl Reader {
35    /// Open an ext4 disk image at `path` and build the in-memory file tree.
36    ///
37    /// The constructor:
38    /// 1. Reads and validates the superblock.
39    /// 2. BFS-traverses the directory tree from the root inode.
40    /// 3. Records hard links (paths whose inode was already visited).
41    /// 4. Reads extent information for every discovered entry.
42    pub fn new(path: &Path) -> ReadResult<Self> {
43        let mut file = File::open(path).map_err(|_| ReadError::NotFound(path.to_path_buf()))?;
44
45        // -- Parse superblock --------------------------------------------------
46        file.seek(SeekFrom::Start(SUPERBLOCK_OFFSET))?;
47        let mut sb_buf = [0u8; SUPERBLOCK_SIZE];
48        file.read_exact(&mut sb_buf).map_err(|_| {
49            ReadError::CouldNotReadSuperBlock(path.to_path_buf(), SUPERBLOCK_OFFSET, SUPERBLOCK_SIZE)
50        })?;
51        let superblock = SuperBlock::read_from(&sb_buf);
52
53        if superblock.magic != SUPERBLOCK_MAGIC {
54            return Err(ReadError::InvalidSuperBlock);
55        }
56
57        let mut reader = Self {
58            file,
59            superblock,
60            group_descriptors: HashMap::new(),
61            inodes: HashMap::new(),
62            tree: FileTree::new(ROOT_INODE, "."),
63            hardlinks: HashMap::new(),
64        };
65
66        // -- BFS traversal to build the file tree ------------------------------
67        // Each work item is (tree node index, inode number).
68        let mut queue: Vec<(usize, InodeNumber)> = vec![(reader.tree.root(), ROOT_INODE)];
69
70        while let Some((parent_idx, inode_num)) = queue.pop() {
71            let children = reader.get_dir_entries(inode_num)?;
72
73            for (name, child_ino) in children {
74                // Skip the "." and ".." pseudo-entries.
75                if name == "." || name == ".." {
76                    continue;
77                }
78
79                // If we have already seen this inode, this entry is a hard link.
80                if reader.inodes.contains_key(&child_ino) {
81                    let parent_path = reader.tree.node_path(parent_idx);
82                    let full = parent_path.join(&name);
83                    // Store as a forward-slash path without leading "/".
84                    let key = full
85                        .to_string_lossy()
86                        .trim_start_matches('/')
87                        .trim_start_matches("./")
88                        .to_string();
89                    reader.hardlinks.insert(key, child_ino);
90                    continue;
91                }
92
93                // Read extents for this entry.
94                let blocks = extent::parse_extents(
95                    &reader.get_inode(child_ino)?,
96                    reader.block_size(),
97                    &mut reader.file,
98                )?;
99
100                let mut node = FileTreeNode {
101                    inode: child_ino,
102                    name: name.clone(),
103                    children: Vec::new(),
104                    parent: None,
105                    blocks: None,
106                    additional_blocks: Vec::new(),
107                    link: None,
108                };
109
110                // Map extent ranges to BlockRange values.
111                if let Some(first) = blocks.first() {
112                    node.blocks = Some(BlockRange {
113                        start: first.0,
114                        end: first.1,
115                    });
116                }
117                for range in blocks.iter().skip(1) {
118                    node.additional_blocks.push(BlockRange {
119                        start: range.0,
120                        end: range.1,
121                    });
122                }
123
124                let child_idx = reader.tree.add_child(parent_idx, node);
125
126                // If the child is a directory, enqueue it for further traversal.
127                let child_inode = reader.get_inode(child_ino)?;
128                if child_inode.is_dir() {
129                    queue.push((child_idx, child_ino));
130                }
131            }
132        }
133
134        Ok(reader)
135    }
136
137    // -- Public accessors ------------------------------------------------------
138
139    /// Borrow the parsed superblock.
140    pub fn superblock(&self) -> &SuperBlock {
141        &self.superblock
142    }
143
144    /// Borrow the in-memory file tree.
145    pub fn tree(&self) -> &FileTree {
146        &self.tree
147    }
148
149    // -- Block size helpers ----------------------------------------------------
150
151    /// Filesystem block size in bytes, derived from `log_block_size`.
152    pub(crate) fn block_size(&self) -> u64 {
153        1024 * (1u64 << self.superblock.log_block_size)
154    }
155
156    /// On-disk group descriptor size.  When the 64-bit feature flag is set, the
157    /// superblock's `desc_size` is used; otherwise the base 32-byte descriptor.
158    fn group_descriptor_size(&self) -> u16 {
159        if self.superblock.feature_incompat & incompat::BIT64 != 0 {
160            self.superblock.desc_size
161        } else {
162            GroupDescriptor::SIZE as u16
163        }
164    }
165
166    // -- Low-level I/O ---------------------------------------------------------
167
168    /// Read a group descriptor from disk (uncached).
169    fn read_group_descriptor(&mut self, number: u32) -> ReadResult<GroupDescriptor> {
170        let bs = self.block_size();
171        let offset = bs + number as u64 * self.group_descriptor_size() as u64;
172        self.file.seek(SeekFrom::Start(offset))?;
173
174        let mut buf = [0u8; 64]; // Enough for both 32- and 64-byte descriptors.
175        let read_len = GroupDescriptor::SIZE;
176        self.file
177            .read_exact(&mut buf[..read_len])
178            .map_err(|_| ReadError::CouldNotReadGroup(number))?;
179
180        Ok(GroupDescriptor::read_from(&buf))
181    }
182
183    /// Get a group descriptor, reading from disk on first access.
184    pub fn get_group_descriptor(&mut self, number: u32) -> ReadResult<GroupDescriptor> {
185        if let Some(gd) = self.group_descriptors.get(&number) {
186            return Ok(gd.clone());
187        }
188        let gd = self.read_group_descriptor(number)?;
189        self.group_descriptors.insert(number, gd.clone());
190        Ok(gd)
191    }
192
193    /// Read an inode from disk (uncached).
194    fn read_inode(&mut self, number: u32) -> ReadResult<Inode> {
195        let group = (number - 1) / self.superblock.inodes_per_group;
196        let index_in_group = ((number - 1) % self.superblock.inodes_per_group) as u64;
197        let gd = self.get_group_descriptor(group)?;
198        let table_start = gd.inode_table_lo as u64 * self.block_size();
199        let inode_offset = table_start + index_in_group * self.superblock.inode_size as u64;
200
201        self.file.seek(SeekFrom::Start(inode_offset))?;
202
203        let mut buf = [0u8; INODE_SIZE as usize];
204        self.file
205            .read_exact(&mut buf)
206            .map_err(|_| ReadError::CouldNotReadInode(number))?;
207
208        Ok(Inode::read_from(&buf))
209    }
210
211    /// Get an inode, reading from disk on first access.
212    pub fn get_inode(&mut self, number: u32) -> ReadResult<Inode> {
213        if let Some(inode) = self.inodes.get(&number) {
214            return Ok(inode.clone());
215        }
216        let inode = self.read_inode(number)?;
217        self.inodes.insert(number, inode.clone());
218        Ok(inode)
219    }
220
221    /// Parse directory entries for the given directory inode.
222    ///
223    /// Reads the extent tree to find the directory's data blocks, then parses
224    /// each block with [`dir::parse_dir_entries`].  Results are sorted
225    /// alphabetically by name for deterministic traversal.
226    fn get_dir_entries(&mut self, inode_number: InodeNumber) -> ReadResult<Vec<(String, InodeNumber)>> {
227        let inode = self.get_inode(inode_number)?;
228        let extents = extent::parse_extents(&inode, self.block_size(), &mut self.file)?;
229        let bs = self.block_size() as usize;
230
231        let mut entries = Vec::new();
232
233        for (phys_start, phys_end) in &extents {
234            self.seek_to_block(*phys_start)?;
235            let num_blocks = phys_end - phys_start;
236            for i in 0..num_blocks {
237                let mut block_buf = vec![0u8; bs];
238                self.file
239                    .read_exact(&mut block_buf)
240                    .map_err(|_| ReadError::CouldNotReadBlock(phys_start + i))?;
241                let block_entries = dir::parse_dir_entries(&block_buf);
242                entries.extend(block_entries);
243            }
244        }
245
246        // Sort alphabetically for deterministic ordering.
247        entries.sort_by(|a, b| a.0.cmp(&b.0));
248        Ok(entries)
249    }
250
251    /// List the children of a directory inode (public wrapper around
252    /// [`get_dir_entries`]).
253    pub fn children_of(&mut self, number: InodeNumber) -> ReadResult<Vec<(String, InodeNumber)>> {
254        self.get_dir_entries(number)
255    }
256
257    /// Seek the underlying file handle to the start of a physical block.
258    fn seek_to_block(&mut self, block: u32) -> ReadResult<()> {
259        self.file
260            .seek(SeekFrom::Start(block as u64 * self.block_size()))?;
261        Ok(())
262    }
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268    use crate::Formatter;
269
270    /// Helper: create a formatter backed by a temp file, returning the reader
271    /// after closing the formatter.
272    fn make_reader_with<F>(setup: F) -> Reader
273    where
274        F: FnOnce(&mut Formatter),
275    {
276        let tmp = tempfile::NamedTempFile::new().unwrap();
277        let mut fmt = Formatter::new(tmp.path(), 4096, 256 * 1024).unwrap();
278        setup(&mut fmt);
279        fmt.close().unwrap();
280        Reader::new(tmp.path()).unwrap()
281    }
282
283    #[test]
284    fn test_superblock_fields_after_roundtrip() {
285        let reader = make_reader_with(|_fmt| {
286            // Empty filesystem -- just root + lost+found.
287        });
288
289        let sb = reader.superblock();
290
291        // The magic number must be the ext4 signature.
292        assert_eq!(sb.magic, SUPERBLOCK_MAGIC);
293
294        // log_block_size=2 means 1024 * (1 << 2) = 4096 bytes per block.
295        assert_eq!(sb.log_block_size, 2);
296        assert_eq!(reader.block_size(), 4096);
297
298        // The first non-reserved inode must be FIRST_INODE (11).
299        assert_eq!(sb.first_ino, FIRST_INODE);
300
301        // Inode size should be 256.
302        assert_eq!(sb.inode_size, INODE_SIZE as u16);
303
304        // The extents feature flag must be set.
305        assert_ne!(sb.feature_incompat & incompat::EXTENTS, 0);
306    }
307
308    #[test]
309    fn test_children_of_root_inode() {
310        let mut reader = make_reader_with(|fmt| {
311            // Create a few entries in the root directory.
312            fmt.create(
313                "/alpha",
314                make_mode(file_mode::S_IFREG, 0o644),
315                None,
316                None,
317                Some(&mut "a".as_bytes()),
318                None,
319                None,
320                None,
321            )
322            .unwrap();
323            fmt.create(
324                "/beta",
325                make_mode(file_mode::S_IFDIR, 0o755),
326                None,
327                None,
328                None,
329                None,
330                None,
331                None,
332            )
333            .unwrap();
334            fmt.create(
335                "/gamma.txt",
336                make_mode(file_mode::S_IFREG, 0o600),
337                None,
338                None,
339                Some(&mut "g".as_bytes()),
340                None,
341                None,
342                None,
343            )
344            .unwrap();
345        });
346
347        let children = reader.children_of(ROOT_INODE).unwrap();
348
349        // Filter out "." and ".." to get real entries.
350        let names: Vec<&str> = children
351            .iter()
352            .filter(|(n, _)| n != "." && n != "..")
353            .map(|(n, _)| n.as_str())
354            .collect();
355
356        // lost+found is always created by the formatter, plus our three entries.
357        assert!(names.contains(&"lost+found"));
358        assert!(names.contains(&"alpha"));
359        assert!(names.contains(&"beta"));
360        assert!(names.contains(&"gamma.txt"));
361        assert_eq!(names.len(), 4);
362    }
363
364    #[test]
365    fn test_get_inode_root() {
366        let mut reader = make_reader_with(|_fmt| {});
367
368        // The root inode should be a directory.
369        let root_inode = reader.get_inode(ROOT_INODE).unwrap();
370        assert!(root_inode.is_dir());
371        assert!(!root_inode.is_reg());
372        assert!(!root_inode.is_link());
373    }
374
375    #[test]
376    fn test_block_size_calculation() {
377        // 4096-byte blocks: log_block_size should be 2.
378        let reader = make_reader_with(|_fmt| {});
379        assert_eq!(reader.block_size(), 4096);
380    }
381}