1use std::os::unix::fs::FileExt;
10use std::path::Path;
11use std::{fs::File, io};
12
13use super::format::{
14 EROFS_BLKSIZ, EROFS_DIRENT_SIZE, EROFS_INODE_EXTENDED_SIZE, EROFS_INODE_FLAT_INLINE,
15 EROFS_INODE_FLAT_PLAIN, EROFS_NULL_ADDR, EROFS_SUPER_OFFSET, EROFS_XATTR_IBODY_HEADER_SIZE,
16 EROFS_XATTR_INDEX_SECURITY, EROFS_XATTR_INDEX_TRUSTED, EROFS_XATTR_INDEX_USER, S_IFBLK,
17 S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, erofs_xattr_align,
18};
19
20pub struct ErofsReader {
26 file: File,
27 meta_blkaddr: u32,
28 root_nid: u32,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum ErofsEntryKind {
33 RegularFile,
34 Directory,
35 Symlink,
36 CharDevice,
37 BlockDevice,
38 Fifo,
39 Socket,
40}
41
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct ErofsEntryInfo {
44 pub kind: ErofsEntryKind,
45 pub opaque: bool,
46 pub whiteout: bool,
47}
48
49impl ErofsReader {
54 pub fn new(file: File) -> io::Result<Self> {
56 let mut sb = [0u8; 128];
57 read_exact_at(&file, EROFS_SUPER_OFFSET, &mut sb)?;
58
59 let magic = u32::from_le_bytes([sb[0], sb[1], sb[2], sb[3]]);
60 if magic != 0xE0F5_E1E2 {
61 return Err(io::Error::new(
62 io::ErrorKind::InvalidData,
63 format!("bad EROFS magic: {magic:#x}"),
64 ));
65 }
66
67 let root_nid = u16::from_le_bytes([sb[0x0E], sb[0x0F]]) as u32;
68 let meta_blkaddr = u32::from_le_bytes([sb[0x28], sb[0x29], sb[0x2A], sb[0x2B]]);
69
70 Ok(Self {
71 file,
72 meta_blkaddr,
73 root_nid,
74 })
75 }
76
77 pub fn read_file(&mut self, path: &str) -> io::Result<Vec<u8>> {
79 let target_inode = self.lookup_path(path)?;
80 if (target_inode.mode & S_IFMT) != S_IFREG {
81 return Err(io::Error::new(
82 io::ErrorKind::InvalidInput,
83 "target is not a regular file",
84 ));
85 }
86 self.read_inode_data(&target_inode)
87 }
88
89 pub fn read_link(&mut self, path: &str) -> io::Result<Vec<u8>> {
91 let target_inode = self.lookup_path(path)?;
92 if (target_inode.mode & S_IFMT) != S_IFLNK {
93 return Err(io::Error::new(
94 io::ErrorKind::InvalidInput,
95 "target is not a symlink",
96 ));
97 }
98 self.read_inode_data(&target_inode)
99 }
100
101 pub fn entry_info(&mut self, path: &str) -> io::Result<ErofsEntryInfo> {
102 let inode = self.lookup_path(path)?;
103 let kind = inode_kind(&inode)?;
104 let opaque = if kind == ErofsEntryKind::Directory {
105 self.inode_is_opaque(&inode)?
106 } else {
107 false
108 };
109 let whiteout = kind == ErofsEntryKind::CharDevice && inode.rdev == 0;
110
111 Ok(ErofsEntryInfo {
112 kind,
113 opaque,
114 whiteout,
115 })
116 }
117
118 fn inode_offset(&self, nid: u32) -> u64 {
119 (self.meta_blkaddr as u64) * (EROFS_BLKSIZ as u64) + (nid as u64) * 32
120 }
121
122 fn read_inode(&mut self, nid: u32) -> io::Result<InodeInfo> {
123 let offset = self.inode_offset(nid);
124
125 let mut buf = [0u8; EROFS_INODE_EXTENDED_SIZE as usize];
126 read_exact_at(&self.file, offset, &mut buf)?;
127
128 let i_format = u16::from_le_bytes([buf[0], buf[1]]);
129 let i_xattr_icount = u16::from_le_bytes([buf[2], buf[3]]);
130 let mode = u16::from_le_bytes([buf[4], buf[5]]);
131 let size = u64::from_le_bytes([
132 buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
133 ]);
134 let i_u = u32::from_le_bytes([buf[16], buf[17], buf[18], buf[19]]);
135
136 let data_layout = ((i_format >> 1) & 0x07) as u8;
137
138 let xattr_ibody_size = if i_xattr_icount == 0 {
142 0u32
143 } else {
144 12 + ((i_xattr_icount as u32) - 1) * 4
145 };
146
147 Ok(InodeInfo {
148 nid,
149 mode,
150 size,
151 data_layout,
152 startblk_lo: i_u,
153 rdev: i_u,
154 xattr_ibody_size,
155 })
156 }
157
158 fn lookup_path(&mut self, path: &str) -> io::Result<InodeInfo> {
159 let components: Vec<&str> = path
160 .trim_start_matches('/')
161 .split('/')
162 .filter(|c| !c.is_empty())
163 .collect();
164
165 if components.is_empty() {
166 if path == "/" {
167 return self.read_inode(self.root_nid);
168 }
169 return Err(io::Error::new(io::ErrorKind::InvalidInput, "empty path"));
170 }
171
172 let mut current_nid = self.root_nid;
173 for (i, component) in components.iter().enumerate() {
174 let inode = self.read_inode(current_nid)?;
175 let mode_type = inode.mode & S_IFMT;
176
177 if mode_type != S_IFDIR {
178 return Err(io::Error::new(
179 io::ErrorKind::NotFound,
180 format!("not a directory at component '{component}'"),
181 ));
182 }
183
184 let target_nid = self.lookup_in_dir(&inode, component)?;
185 if i + 1 == components.len() {
186 return self.read_inode(target_nid);
187 }
188
189 current_nid = target_nid;
190 }
191
192 Err(io::Error::new(io::ErrorKind::NotFound, "path not found"))
193 }
194
195 fn lookup_in_dir(&mut self, dir_inode: &InodeInfo, name: &str) -> io::Result<u32> {
204 let dir_data = self.read_inode_data(dir_inode)?;
205 let blksiz = EROFS_BLKSIZ as usize;
206 let target = name.as_bytes();
207 let block_count = dir_data.len().div_ceil(blksiz);
208 let mut left = 0usize;
209 let mut right = block_count;
210
211 while left < right {
212 let mid = (left + right) / 2;
213 let block = dir_block(&dir_data, mid, blksiz);
214 let dirent_count = dir_block_dirent_count(block)?;
215 let first_name = dirent_name(block, 0, dirent_count)?;
216 let last_name = dirent_name(block, dirent_count - 1, dirent_count)?;
217
218 if target < first_name {
219 right = mid;
220 continue;
221 }
222
223 if target > last_name {
224 left = mid + 1;
225 continue;
226 }
227
228 return lookup_in_dir_block(block, dirent_count, target)?.ok_or_else(|| {
229 io::Error::new(
230 io::ErrorKind::NotFound,
231 format!("entry '{name}' not found in directory"),
232 )
233 });
234 }
235
236 Err(io::Error::new(
237 io::ErrorKind::NotFound,
238 format!("entry '{name}' not found in directory"),
239 ))
240 }
241
242 fn read_inode_data(&mut self, inode: &InodeInfo) -> io::Result<Vec<u8>> {
243 let size = inode.size as usize;
244 if size == 0 {
245 return Ok(Vec::new());
246 }
247
248 let blksiz = EROFS_BLKSIZ as usize;
249
250 match inode.data_layout {
251 EROFS_INODE_FLAT_PLAIN => {
252 if inode.startblk_lo == EROFS_NULL_ADDR {
253 return Ok(Vec::new());
254 }
255 let data_offset = (inode.startblk_lo as u64) * (EROFS_BLKSIZ as u64);
256 let mut data = vec![0u8; size];
257 read_exact_at(&self.file, data_offset, &mut data)?;
258 Ok(data)
259 }
260 EROFS_INODE_FLAT_INLINE => {
261 let full_blocks = size / blksiz;
262 let tail_size = size % blksiz;
263 let mut data = Vec::with_capacity(size);
264
265 if full_blocks > 0 && inode.startblk_lo != EROFS_NULL_ADDR {
267 let data_offset = (inode.startblk_lo as u64) * (EROFS_BLKSIZ as u64);
268 let mut block_data = vec![0u8; full_blocks * blksiz];
269 read_exact_at(&self.file, data_offset, &mut block_data)?;
270 data.extend_from_slice(&block_data);
271 }
272
273 if tail_size > 0 {
275 let inline_offset = self.inode_offset(inode.nid)
276 + EROFS_INODE_EXTENDED_SIZE as u64
277 + inode.xattr_ibody_size as u64;
278 let mut tail = vec![0u8; tail_size];
279 read_exact_at(&self.file, inline_offset, &mut tail)?;
280 data.extend_from_slice(&tail);
281 }
282
283 Ok(data)
284 }
285 _ => Err(io::Error::new(
286 io::ErrorKind::Unsupported,
287 format!("unsupported data layout: {}", inode.data_layout),
288 )),
289 }
290 }
291
292 fn inode_is_opaque(&mut self, inode: &InodeInfo) -> io::Result<bool> {
293 for (name, value) in self.read_inode_xattrs(inode)? {
294 if name == b"trusted.overlay.opaque" && value == b"y" {
295 return Ok(true);
296 }
297 }
298
299 Ok(false)
300 }
301
302 fn read_inode_xattrs(&mut self, inode: &InodeInfo) -> io::Result<Vec<(Vec<u8>, Vec<u8>)>> {
303 if inode.xattr_ibody_size == 0 {
304 return Ok(Vec::new());
305 }
306
307 let total = inode.xattr_ibody_size as usize;
308 if total < EROFS_XATTR_IBODY_HEADER_SIZE as usize {
309 return Err(io::Error::new(
310 io::ErrorKind::InvalidData,
311 "xattr ibody smaller than header",
312 ));
313 }
314
315 let mut offset = self.inode_offset(inode.nid)
316 + EROFS_INODE_EXTENDED_SIZE as u64
317 + EROFS_XATTR_IBODY_HEADER_SIZE as u64;
318 let mut remaining = total - EROFS_XATTR_IBODY_HEADER_SIZE as usize;
319 let mut xattrs = Vec::new();
320
321 while remaining > 0 {
322 if remaining < 4 {
323 return Err(io::Error::new(
324 io::ErrorKind::InvalidData,
325 "truncated xattr entry header",
326 ));
327 }
328
329 let mut entry = [0u8; 4];
330 read_exact_at(&self.file, offset, &mut entry)?;
331
332 let name_len = entry[0] as usize;
333 let name_index = entry[1];
334 let value_len = u16::from_le_bytes([entry[2], entry[3]]) as usize;
335 let entry_size = 4 + name_len + value_len;
336 let aligned_size = erofs_xattr_align(entry_size);
337
338 if aligned_size > remaining {
339 return Err(io::Error::new(
340 io::ErrorKind::InvalidData,
341 "xattr entry exceeds ibody size",
342 ));
343 }
344
345 let mut suffix = vec![0u8; name_len];
346 read_exact_at(&self.file, offset + 4, &mut suffix)?;
347 let mut value = vec![0u8; value_len];
348 read_exact_at(&self.file, offset + 4 + name_len as u64, &mut value)?;
349
350 let name = match name_index {
351 EROFS_XATTR_INDEX_USER => [b"user.".as_slice(), suffix.as_slice()].concat(),
352 EROFS_XATTR_INDEX_TRUSTED => [b"trusted.".as_slice(), suffix.as_slice()].concat(),
353 EROFS_XATTR_INDEX_SECURITY => [b"security.".as_slice(), suffix.as_slice()].concat(),
354 other => {
355 return Err(io::Error::new(
356 io::ErrorKind::InvalidData,
357 format!("unsupported xattr name index: {other}"),
358 ));
359 }
360 };
361
362 xattrs.push((name, value));
363 offset += aligned_size as u64;
364 remaining -= aligned_size;
365 }
366
367 Ok(xattrs)
368 }
369}
370
371struct InodeInfo {
376 nid: u32,
377 mode: u16,
378 size: u64,
379 data_layout: u8,
380 startblk_lo: u32,
381 rdev: u32,
382 xattr_ibody_size: u32,
383}
384
385fn read_exact_at(file: &File, offset: u64, mut buf: &mut [u8]) -> io::Result<()> {
390 let mut current_offset = offset;
391 while !buf.is_empty() {
392 let read = file.read_at(buf, current_offset)?;
393 if read == 0 {
394 return Err(io::Error::new(
395 io::ErrorKind::UnexpectedEof,
396 "unexpected EOF",
397 ));
398 }
399 current_offset += read as u64;
400 buf = &mut buf[read..];
401 }
402
403 Ok(())
404}
405
406fn dir_block(dir_data: &[u8], block_idx: usize, blksiz: usize) -> &[u8] {
407 let offset = block_idx * blksiz;
408 let end = (offset + blksiz).min(dir_data.len());
409 &dir_data[offset..end]
410}
411
412fn dir_block_dirent_count(block: &[u8]) -> io::Result<usize> {
413 if block.len() < EROFS_DIRENT_SIZE as usize {
414 return Err(io::Error::new(
415 io::ErrorKind::InvalidData,
416 "directory block smaller than one dirent",
417 ));
418 }
419
420 let first_nameoff = u16::from_le_bytes([block[8], block[9]]) as usize;
421 let dirent_size = EROFS_DIRENT_SIZE as usize;
422 if first_nameoff < dirent_size
423 || !first_nameoff.is_multiple_of(dirent_size)
424 || first_nameoff > block.len()
425 {
426 return Err(io::Error::new(
427 io::ErrorKind::InvalidData,
428 "invalid first dirent name offset",
429 ));
430 }
431
432 Ok(first_nameoff / dirent_size)
433}
434
435fn dirent_name(block: &[u8], idx: usize, dirent_count: usize) -> io::Result<&[u8]> {
436 let dirent_size = EROFS_DIRENT_SIZE as usize;
437 let dirent_off = idx
438 .checked_mul(dirent_size)
439 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "dirent offset overflow"))?;
440
441 if idx >= dirent_count || dirent_off + dirent_size > block.len() {
442 return Err(io::Error::new(
443 io::ErrorKind::InvalidData,
444 "dirent index out of bounds",
445 ));
446 }
447
448 let nameoff = u16::from_le_bytes([block[dirent_off + 8], block[dirent_off + 9]]) as usize;
449 let mut name_end = if idx + 1 < dirent_count {
450 let next_off = dirent_off + dirent_size;
451 u16::from_le_bytes([block[next_off + 8], block[next_off + 9]]) as usize
452 } else {
453 block.len()
454 };
455
456 if nameoff > name_end || name_end > block.len() {
457 return Err(io::Error::new(
458 io::ErrorKind::InvalidData,
459 "dirent name range out of bounds",
460 ));
461 }
462
463 while name_end > nameoff && block[name_end - 1] == 0 {
464 name_end -= 1;
465 }
466
467 Ok(&block[nameoff..name_end])
468}
469
470fn dirent_nid(block: &[u8], idx: usize) -> io::Result<u32> {
471 let dirent_size = EROFS_DIRENT_SIZE as usize;
472 let dirent_off = idx
473 .checked_mul(dirent_size)
474 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "dirent offset overflow"))?;
475 if dirent_off + dirent_size > block.len() {
476 return Err(io::Error::new(
477 io::ErrorKind::InvalidData,
478 "dirent NID out of bounds",
479 ));
480 }
481
482 let nid = u64::from_le_bytes([
483 block[dirent_off],
484 block[dirent_off + 1],
485 block[dirent_off + 2],
486 block[dirent_off + 3],
487 block[dirent_off + 4],
488 block[dirent_off + 5],
489 block[dirent_off + 6],
490 block[dirent_off + 7],
491 ]);
492 u32::try_from(nid)
493 .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "dirent NID overflow"))
494}
495
496fn lookup_in_dir_block(
497 block: &[u8],
498 dirent_count: usize,
499 target: &[u8],
500) -> io::Result<Option<u32>> {
501 let mut left = 0usize;
502 let mut right = dirent_count;
503
504 while left < right {
505 let mid = (left + right) / 2;
506 match target.cmp(dirent_name(block, mid, dirent_count)?) {
507 std::cmp::Ordering::Less => right = mid,
508 std::cmp::Ordering::Greater => left = mid + 1,
509 std::cmp::Ordering::Equal => return dirent_nid(block, mid).map(Some),
510 }
511 }
512
513 Ok(None)
514}
515
516fn inode_kind(inode: &InodeInfo) -> io::Result<ErofsEntryKind> {
517 match inode.mode & S_IFMT {
518 S_IFREG => Ok(ErofsEntryKind::RegularFile),
519 S_IFDIR => Ok(ErofsEntryKind::Directory),
520 S_IFLNK => Ok(ErofsEntryKind::Symlink),
521 S_IFCHR => Ok(ErofsEntryKind::CharDevice),
522 S_IFBLK => Ok(ErofsEntryKind::BlockDevice),
523 S_IFIFO => Ok(ErofsEntryKind::Fifo),
524 S_IFSOCK => Ok(ErofsEntryKind::Socket),
525 other => Err(io::Error::new(
526 io::ErrorKind::InvalidData,
527 format!("unsupported inode mode type: {other:#o}"),
528 )),
529 }
530}
531
532pub fn read_file_from_erofs(image_path: &Path, file_path: &str) -> io::Result<Vec<u8>> {
534 let file = std::fs::File::open(image_path)?;
535 let mut reader = ErofsReader::new(file)?;
536 reader.read_file(file_path)
537}
538
539pub fn entry_info_from_erofs(image_path: &Path, file_path: &str) -> io::Result<ErofsEntryInfo> {
540 let file = std::fs::File::open(image_path)?;
541 let mut reader = ErofsReader::new(file)?;
542 reader.entry_info(file_path)
543}
544
545#[cfg(test)]
550mod tests {
551 use std::{fs::File, io};
552
553 use tempfile::tempdir;
554
555 use super::ErofsReader;
556 use crate::{
557 erofs::write_erofs,
558 filetree::{FileData, FileTree, InodeMetadata, RegularFileNode, TreeNode},
559 };
560
561 fn make_regular_file(data: &[u8]) -> TreeNode {
562 TreeNode::RegularFile(RegularFileNode {
563 metadata: InodeMetadata::default(),
564 xattrs: Vec::new(),
565 data: FileData::Memory(data.to_vec()),
566 nlink: 1,
567 })
568 }
569
570 #[test]
571 fn lookup_path_resolves_large_multi_block_directory() {
572 let mut tree = FileTree::new();
573 for i in 0..5000 {
574 let path = format!("dir/file-{i:04}.txt");
575 tree.insert(path.as_bytes(), make_regular_file(b"x"))
576 .expect("insert file");
577 }
578
579 let output_dir = tempdir().expect("tempdir");
580 let output = output_dir.path().join("large-dir.erofs");
581 write_erofs(&tree, &output).expect("write erofs");
582
583 let file = File::open(&output).expect("open erofs");
584 let mut reader = ErofsReader::new(file).expect("reader");
585
586 assert_eq!(reader.read_file("/dir/file-0000.txt").expect("first"), b"x");
587 assert_eq!(
588 reader.read_file("/dir/file-2500.txt").expect("middle"),
589 b"x"
590 );
591 assert_eq!(reader.read_file("/dir/file-4999.txt").expect("last"), b"x");
592
593 let err = reader
594 .entry_info("/dir/file-9999.txt")
595 .expect_err("missing entry should fail");
596 assert_eq!(err.kind(), io::ErrorKind::NotFound);
597 }
598}