Skip to main content

arcbox_ext4/
reader_io.rs

1// Path resolution and file I/O for the ext4 reader.
2//
3// Adds high-level operations (`exists`, `stat`, `list_dir`, `read_file`) on
4// top of the low-level [`Reader`] built in `reader.rs`.  Path resolution
5// follows POSIX semantics including symlink traversal with loop detection.
6
7use std::collections::HashSet;
8use std::io::{Read, Seek, SeekFrom};
9
10use crate::constants::*;
11use crate::error::{ReadError, ReadResult};
12use crate::extent;
13use crate::file_tree::InodeNumber;
14use crate::reader::Reader;
15use crate::types::*;
16
17/// Maximum number of symlink hops before we declare a loop.
18const MAX_SYMLINK_HOPS: usize = 40;
19
20// ---------------------------------------------------------------------------
21// Public API
22// ---------------------------------------------------------------------------
23
24impl Reader {
25    /// Check whether `path` exists in this ext4 filesystem.
26    pub fn exists(&mut self, path: &str) -> bool {
27        self.resolve_path(path, true).is_ok()
28    }
29
30    /// Return `(inode_number, inode)` for the given `path`, following symlinks.
31    pub fn stat(&mut self, path: &str) -> ReadResult<(InodeNumber, Inode)> {
32        self.resolve_path(path, true)
33    }
34
35    /// Return `(inode_number, inode)` for the given `path` **without** following
36    /// the final symlink component.
37    pub fn stat_no_follow(&mut self, path: &str) -> ReadResult<(InodeNumber, Inode)> {
38        self.resolve_path(path, false)
39    }
40
41    /// List a directory's entries (names only, excluding "." and "..").
42    pub fn list_dir(&mut self, path: &str) -> ReadResult<Vec<String>> {
43        let (ino_num, inode) = self.stat(path)?;
44        if !inode.is_dir() {
45            return Err(ReadError::NotADirectory(path.to_string()));
46        }
47        let children = self.children_of(ino_num)?;
48        Ok(children
49            .into_iter()
50            .filter(|(name, _)| name != "." && name != "..")
51            .map(|(name, _)| name)
52            .collect())
53    }
54
55    /// Read file contents at `path` starting at `offset`.
56    ///
57    /// If `count` is `None`, reads to EOF.  Returns the bytes read (may be
58    /// shorter than requested if the file is smaller).
59    pub fn read_file(
60        &mut self,
61        path: &str,
62        offset: u64,
63        count: Option<usize>,
64    ) -> ReadResult<Vec<u8>> {
65        let (ino_num, inode) = self.stat(path)?;
66
67        if inode.is_dir() {
68            return Err(ReadError::IsDirectory(path.to_string()));
69        }
70        if !inode.is_reg() {
71            return Err(ReadError::NotAFile(path.to_string()));
72        }
73
74        let file_size = inode.file_size();
75        let start = offset.min(file_size);
76        let max_readable = file_size - start;
77        let want = match count {
78            Some(c) => (c as u64).min(max_readable),
79            None => max_readable,
80        };
81
82        if want == 0 {
83            return Ok(Vec::new());
84        }
85
86        self.read_from_extents(ino_num, start, want)
87    }
88
89    /// Read file contents into a pre-allocated buffer.  Returns the number of
90    /// bytes actually written to `buf` (may be less than `buf.len()` at EOF).
91    pub fn read_file_into(
92        &mut self,
93        path: &str,
94        buf: &mut [u8],
95        offset: u64,
96    ) -> ReadResult<usize> {
97        let data = self.read_file(path, offset, Some(buf.len()))?;
98        let n = data.len().min(buf.len());
99        buf[..n].copy_from_slice(&data[..n]);
100        Ok(n)
101    }
102}
103
104// ---------------------------------------------------------------------------
105// Path resolution
106// ---------------------------------------------------------------------------
107
108impl Reader {
109    /// Resolve `path` to an inode, optionally following symlinks.
110    ///
111    /// * Absolute paths start from the root inode.
112    /// * Relative paths also start from the root (there is no "cwd").
113    /// * "." is a no-op, ".." goes to the parent (via a parent stack).
114    /// * Symlinks are recursively resolved up to [`MAX_SYMLINK_HOPS`].
115    fn resolve_path(
116        &mut self,
117        path: &str,
118        follow_symlinks: bool,
119    ) -> ReadResult<(InodeNumber, Inode)> {
120        let mut components = normalize_path(path);
121        let mut current: InodeNumber = ROOT_INODE;
122        let mut parent_stack: Vec<InodeNumber> = Vec::new();
123        let mut visited: HashSet<InodeNumber> = HashSet::new();
124        let mut hops: usize = 0;
125        let mut idx: usize = 0;
126
127        while idx < components.len() {
128            let name = components[idx].clone();
129
130            // "." -- stay in the current directory.
131            if name == "." {
132                idx += 1;
133                continue;
134            }
135
136            // ".." -- go to parent.
137            if name == ".." {
138                if current != ROOT_INODE {
139                    if let Some(parent) = parent_stack.pop() {
140                        current = parent;
141                    } else {
142                        // Fallback: look up ".." in the directory entries.
143                        let entries = self.children_of(current)?;
144                        if let Some((_, parent_ino)) = entries.iter().find(|(n, _)| n == "..") {
145                            current = *parent_ino;
146                        }
147                    }
148                }
149                idx += 1;
150                continue;
151            }
152
153            // Regular component -- current must be a directory.
154            let current_inode = self.get_inode(current)?;
155            if !current_inode.is_dir() {
156                return Err(ReadError::NotADirectory(name));
157            }
158
159            // Look up the child by name.
160            let entries = self.children_of(current)?;
161            let child_ino = entries
162                .iter()
163                .find(|(n, _)| *n == name)
164                .map(|(_, ino)| *ino)
165                .ok_or_else(|| ReadError::PathNotFound(name.clone()))?;
166
167            let child_inode = self.get_inode(child_ino)?;
168
169            if child_inode.is_link() && follow_symlinks {
170                // Symlink loop detection.
171                if visited.contains(&child_ino) {
172                    return Err(ReadError::SymlinkLoop(path.to_string()));
173                }
174                visited.insert(child_ino);
175
176                hops += 1;
177                if hops > MAX_SYMLINK_HOPS {
178                    return Err(ReadError::SymlinkLoop(path.to_string()));
179                }
180
181                // Read the symlink target.
182                let target = self.read_symlink_target(&child_inode, child_ino)?;
183                if target.is_empty() {
184                    return Err(ReadError::InvalidPath("empty symlink target".to_string()));
185                }
186
187                let target_components = normalize_path(&target);
188
189                if target.starts_with('/') {
190                    // Absolute symlink: restart from root.
191                    current = ROOT_INODE;
192                    parent_stack.clear();
193                    // Replace all remaining components with target + rest.
194                    let rest: Vec<String> = components[idx + 1..].to_vec();
195                    components = [target_components, rest].concat();
196                    idx = 0;
197                } else {
198                    // Relative symlink: splice target into the component list.
199                    let before: Vec<String> = components[..idx].to_vec();
200                    let rest: Vec<String> = components[idx + 1..].to_vec();
201                    components = [before, target_components, rest].concat();
202                    // Do not advance idx -- re-process from the splice point.
203                }
204            } else {
205                // Not a symlink (or not following) -- descend.
206                parent_stack.push(current);
207                current = child_ino;
208                idx += 1;
209            }
210        }
211
212        let final_inode = self.get_inode(current)?;
213        Ok((current, final_inode))
214    }
215}
216
217// ---------------------------------------------------------------------------
218// File reading from extents
219// ---------------------------------------------------------------------------
220
221impl Reader {
222    /// Read `count` bytes starting at byte offset `start` from the file
223    /// described by `inode_number`'s extent tree.
224    fn read_from_extents(
225        &mut self,
226        inode_number: InodeNumber,
227        start: u64,
228        count: u64,
229    ) -> ReadResult<Vec<u8>> {
230        let inode = self.get_inode(inode_number)?;
231        let extents = extent::parse_extents(&inode, self.block_size(), &mut self.file)?;
232
233        if extents.is_empty() {
234            return Ok(Vec::new());
235        }
236
237        let bs = self.block_size();
238        let req_end = start + count;
239        let mut out = Vec::with_capacity(count as usize);
240        let mut logical_offset: u64 = 0;
241
242        for (phys_start, phys_end) in &extents {
243            let extent_bytes = (*phys_end as u64 - *phys_start as u64) * bs;
244            let logical_end = logical_offset + extent_bytes;
245
246            // Skip extents entirely before the requested range.
247            if logical_end <= start {
248                logical_offset = logical_end;
249                continue;
250            }
251            // Stop once we have passed the requested range.
252            if logical_offset >= req_end {
253                break;
254            }
255
256            // Compute the overlap between the requested range and this extent.
257            let overlap_start = start.max(logical_offset);
258            let overlap_end = req_end.min(logical_end);
259            let mut remaining = overlap_end - overlap_start;
260
261            if remaining == 0 {
262                logical_offset = logical_end;
263                continue;
264            }
265
266            // Seek to the correct byte within this extent.
267            let offset_into_extent = overlap_start - logical_offset;
268            let abs_byte_offset = *phys_start as u64 * bs + offset_into_extent;
269            self.file.seek(SeekFrom::Start(abs_byte_offset))?;
270
271            // Read in chunks of up to 1 MiB to avoid enormous single reads.
272            while remaining > 0 {
273                let chunk = remaining.min(1 << 20) as usize;
274                let mut buf = vec![0u8; chunk];
275                let n = self.file.read(&mut buf)?;
276                if n == 0 {
277                    break; // EOF
278                }
279                out.extend_from_slice(&buf[..n]);
280                remaining -= n as u64;
281            }
282
283            logical_offset = logical_end;
284            if out.len() >= count as usize {
285                break;
286            }
287        }
288
289        // Truncate to exactly `count` bytes in case we over-read.
290        out.truncate(count as usize);
291        Ok(out)
292    }
293
294    /// Read the target of a symbolic link.
295    ///
296    /// Fast symlinks (< 60 bytes) store the target directly in the inode's
297    /// `block` field.  Longer targets are stored in data blocks referenced by
298    /// the inode's extent tree.
299    fn read_symlink_target(
300        &mut self,
301        inode: &Inode,
302        inode_number: InodeNumber,
303    ) -> ReadResult<String> {
304        let size = inode.file_size();
305        if size == 0 {
306            return Ok(String::new());
307        }
308
309        if size < INODE_BLOCK_SIZE as u64 {
310            // Fast symlink: target is stored inline in the block field.
311            let bytes = &inode.block[..size as usize];
312            return Ok(String::from_utf8_lossy(bytes).into_owned());
313        }
314
315        // Slow symlink: read from extents.
316        let data = self.read_from_extents(inode_number, 0, size)?;
317        Ok(String::from_utf8_lossy(&data).into_owned())
318    }
319}
320
321// ---------------------------------------------------------------------------
322// Helpers
323// ---------------------------------------------------------------------------
324
325/// Normalize a path into components, stripping leading "/" and splitting on
326/// "/".  Empty components (from consecutive slashes) are dropped.
327fn normalize_path(path: &str) -> Vec<String> {
328    let trimmed = path.strip_prefix('/').unwrap_or(path);
329    if trimmed.is_empty() {
330        return Vec::new();
331    }
332    trimmed
333        .split('/')
334        .filter(|s| !s.is_empty())
335        .map(String::from)
336        .collect()
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342
343    #[test]
344    fn test_normalize_path_absolute() {
345        assert_eq!(normalize_path("/etc/passwd"), vec!["etc", "passwd"]);
346    }
347
348    #[test]
349    fn test_normalize_path_relative() {
350        assert_eq!(normalize_path("etc/passwd"), vec!["etc", "passwd"]);
351    }
352
353    #[test]
354    fn test_normalize_path_root() {
355        assert!(normalize_path("/").is_empty());
356    }
357
358    #[test]
359    fn test_normalize_path_empty() {
360        assert!(normalize_path("").is_empty());
361    }
362
363    #[test]
364    fn test_normalize_path_consecutive_slashes() {
365        assert_eq!(normalize_path("//a///b//"), vec!["a", "b"]);
366    }
367
368    #[test]
369    fn test_normalize_path_dots() {
370        // Dots are kept as components; resolution handles them during traversal.
371        assert_eq!(
372            normalize_path("/a/./b/../c"),
373            vec!["a", ".", "b", "..", "c"]
374        );
375    }
376
377    #[test]
378    fn test_normalize_path_single_component() {
379        assert_eq!(normalize_path("/file.txt"), vec!["file.txt"]);
380        assert_eq!(normalize_path("file.txt"), vec!["file.txt"]);
381    }
382
383    #[test]
384    fn test_normalize_path_trailing_slash() {
385        // A trailing slash should not produce an empty trailing component.
386        assert_eq!(normalize_path("/etc/"), vec!["etc"]);
387        assert_eq!(normalize_path("/a/b/c/"), vec!["a", "b", "c"]);
388    }
389
390    #[test]
391    fn test_normalize_path_deeply_nested() {
392        assert_eq!(
393            normalize_path("/a/b/c/d/e/f"),
394            vec!["a", "b", "c", "d", "e", "f"]
395        );
396    }
397}