1use std::os::unix::fs::FileExt;
10use std::path::Path;
11use std::{fs::File, io};
12
13use super::format::{
14 EROFS_BLKSIZ, EROFS_DIRENT_SIZE, EROFS_INODE_EXTENDED_SIZE, EROFS_INODE_FLAT_INLINE,
15 EROFS_INODE_FLAT_PLAIN, EROFS_NULL_ADDR, EROFS_SUPER_OFFSET, EROFS_XATTR_IBODY_HEADER_SIZE,
16 EROFS_XATTR_INDEX_SECURITY, EROFS_XATTR_INDEX_TRUSTED, EROFS_XATTR_INDEX_USER, S_IFBLK,
17 S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG, S_IFSOCK, erofs_xattr_align,
18};
19
20pub struct ErofsReader {
26 file: File,
27 meta_blkaddr: u32,
28 root_nid: u32,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum ErofsEntryKind {
33 RegularFile,
34 Directory,
35 Symlink,
36 CharDevice,
37 BlockDevice,
38 Fifo,
39 Socket,
40}
41
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct ErofsEntryInfo {
44 pub kind: ErofsEntryKind,
45 pub opaque: bool,
46 pub whiteout: bool,
47}
48
49#[cfg(test)]
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub(crate) struct ErofsInodeDebugInfo {
52 pub nid: u32,
53 pub nlink: u32,
54 pub size: u64,
55 pub data_layout: u8,
56}
57
58impl ErofsReader {
63 pub fn new(file: File) -> io::Result<Self> {
65 let mut sb = [0u8; 128];
66 read_exact_at(&file, EROFS_SUPER_OFFSET, &mut sb)?;
67
68 let magic = u32::from_le_bytes([sb[0], sb[1], sb[2], sb[3]]);
69 if magic != 0xE0F5_E1E2 {
70 return Err(io::Error::new(
71 io::ErrorKind::InvalidData,
72 format!("bad EROFS magic: {magic:#x}"),
73 ));
74 }
75
76 let root_nid = u16::from_le_bytes([sb[0x0E], sb[0x0F]]) as u32;
77 let meta_blkaddr = u32::from_le_bytes([sb[0x28], sb[0x29], sb[0x2A], sb[0x2B]]);
78
79 Ok(Self {
80 file,
81 meta_blkaddr,
82 root_nid,
83 })
84 }
85
86 pub fn read_file(&mut self, path: &str) -> io::Result<Vec<u8>> {
88 let target_inode = self.lookup_path(path)?;
89 if (target_inode.mode & S_IFMT) != S_IFREG {
90 return Err(io::Error::new(
91 io::ErrorKind::InvalidInput,
92 "target is not a regular file",
93 ));
94 }
95 self.read_inode_data(&target_inode)
96 }
97
98 pub fn read_link(&mut self, path: &str) -> io::Result<Vec<u8>> {
100 let target_inode = self.lookup_path(path)?;
101 if (target_inode.mode & S_IFMT) != S_IFLNK {
102 return Err(io::Error::new(
103 io::ErrorKind::InvalidInput,
104 "target is not a symlink",
105 ));
106 }
107 self.read_inode_data(&target_inode)
108 }
109
110 pub fn entry_info(&mut self, path: &str) -> io::Result<ErofsEntryInfo> {
111 let inode = self.lookup_path(path)?;
112 let kind = inode_kind(&inode)?;
113 let opaque = if kind == ErofsEntryKind::Directory {
114 self.inode_is_opaque(&inode)?
115 } else {
116 false
117 };
118 let whiteout = kind == ErofsEntryKind::CharDevice && inode.rdev == 0;
119
120 Ok(ErofsEntryInfo {
121 kind,
122 opaque,
123 whiteout,
124 })
125 }
126
127 #[cfg(test)]
128 pub(crate) fn inode_debug_info(&mut self, path: &str) -> io::Result<ErofsInodeDebugInfo> {
129 let inode = self.lookup_path(path)?;
130 Ok(ErofsInodeDebugInfo {
131 nid: inode.nid,
132 nlink: inode.nlink,
133 size: inode.size,
134 data_layout: inode.data_layout,
135 })
136 }
137
138 fn inode_offset(&self, nid: u32) -> u64 {
139 (self.meta_blkaddr as u64) * (EROFS_BLKSIZ as u64) + (nid as u64) * 32
140 }
141
142 fn read_inode(&mut self, nid: u32) -> io::Result<InodeInfo> {
143 let offset = self.inode_offset(nid);
144
145 let mut buf = [0u8; EROFS_INODE_EXTENDED_SIZE as usize];
146 read_exact_at(&self.file, offset, &mut buf)?;
147
148 let i_format = u16::from_le_bytes([buf[0], buf[1]]);
149 let i_xattr_icount = u16::from_le_bytes([buf[2], buf[3]]);
150 let mode = u16::from_le_bytes([buf[4], buf[5]]);
151 let size = u64::from_le_bytes([
152 buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15],
153 ]);
154 let i_u = u32::from_le_bytes([buf[16], buf[17], buf[18], buf[19]]);
155 #[cfg(test)]
156 let nlink = u32::from_le_bytes([buf[44], buf[45], buf[46], buf[47]]);
157
158 let data_layout = ((i_format >> 1) & 0x07) as u8;
159
160 let xattr_ibody_size = if i_xattr_icount == 0 {
164 0u32
165 } else {
166 12 + ((i_xattr_icount as u32) - 1) * 4
167 };
168
169 Ok(InodeInfo {
170 nid,
171 mode,
172 size,
173 #[cfg(test)]
174 nlink,
175 data_layout,
176 startblk_lo: i_u,
177 rdev: i_u,
178 xattr_ibody_size,
179 })
180 }
181
182 fn lookup_path(&mut self, path: &str) -> io::Result<InodeInfo> {
183 let components: Vec<&str> = path
184 .trim_start_matches('/')
185 .split('/')
186 .filter(|c| !c.is_empty())
187 .collect();
188
189 if components.is_empty() {
190 if path == "/" {
191 return self.read_inode(self.root_nid);
192 }
193 return Err(io::Error::new(io::ErrorKind::InvalidInput, "empty path"));
194 }
195
196 let mut current_nid = self.root_nid;
197 for (i, component) in components.iter().enumerate() {
198 let inode = self.read_inode(current_nid)?;
199 let mode_type = inode.mode & S_IFMT;
200
201 if mode_type != S_IFDIR {
202 return Err(io::Error::new(
203 io::ErrorKind::NotFound,
204 format!("not a directory at component '{component}'"),
205 ));
206 }
207
208 let target_nid = self.lookup_in_dir(&inode, component)?;
209 if i + 1 == components.len() {
210 return self.read_inode(target_nid);
211 }
212
213 current_nid = target_nid;
214 }
215
216 Err(io::Error::new(io::ErrorKind::NotFound, "path not found"))
217 }
218
219 fn lookup_in_dir(&mut self, dir_inode: &InodeInfo, name: &str) -> io::Result<u32> {
228 let dir_data = self.read_inode_data(dir_inode)?;
229 let blksiz = EROFS_BLKSIZ as usize;
230 let target = name.as_bytes();
231 let block_count = dir_data.len().div_ceil(blksiz);
232 let mut left = 0usize;
233 let mut right = block_count;
234
235 while left < right {
236 let mid = (left + right) / 2;
237 let block = dir_block(&dir_data, mid, blksiz);
238 let dirent_count = dir_block_dirent_count(block)?;
239 let first_name = dirent_name(block, 0, dirent_count)?;
240 let last_name = dirent_name(block, dirent_count - 1, dirent_count)?;
241
242 if target < first_name {
243 right = mid;
244 continue;
245 }
246
247 if target > last_name {
248 left = mid + 1;
249 continue;
250 }
251
252 return lookup_in_dir_block(block, dirent_count, target)?.ok_or_else(|| {
253 io::Error::new(
254 io::ErrorKind::NotFound,
255 format!("entry '{name}' not found in directory"),
256 )
257 });
258 }
259
260 Err(io::Error::new(
261 io::ErrorKind::NotFound,
262 format!("entry '{name}' not found in directory"),
263 ))
264 }
265
266 fn read_inode_data(&mut self, inode: &InodeInfo) -> io::Result<Vec<u8>> {
267 let size = inode.size as usize;
268 if size == 0 {
269 return Ok(Vec::new());
270 }
271
272 let blksiz = EROFS_BLKSIZ as usize;
273
274 match inode.data_layout {
275 EROFS_INODE_FLAT_PLAIN => {
276 if inode.startblk_lo == EROFS_NULL_ADDR {
277 return Ok(Vec::new());
278 }
279 let data_offset = (inode.startblk_lo as u64) * (EROFS_BLKSIZ as u64);
280 let mut data = vec![0u8; size];
281 read_exact_at(&self.file, data_offset, &mut data)?;
282 Ok(data)
283 }
284 EROFS_INODE_FLAT_INLINE => {
285 let full_blocks = size / blksiz;
286 let tail_size = size % blksiz;
287 let mut data = Vec::with_capacity(size);
288
289 if full_blocks > 0 && inode.startblk_lo != EROFS_NULL_ADDR {
291 let data_offset = (inode.startblk_lo as u64) * (EROFS_BLKSIZ as u64);
292 let mut block_data = vec![0u8; full_blocks * blksiz];
293 read_exact_at(&self.file, data_offset, &mut block_data)?;
294 data.extend_from_slice(&block_data);
295 }
296
297 if tail_size > 0 {
299 let inline_offset = self.inode_offset(inode.nid)
300 + EROFS_INODE_EXTENDED_SIZE as u64
301 + inode.xattr_ibody_size as u64;
302 let mut tail = vec![0u8; tail_size];
303 read_exact_at(&self.file, inline_offset, &mut tail)?;
304 data.extend_from_slice(&tail);
305 }
306
307 Ok(data)
308 }
309 _ => Err(io::Error::new(
310 io::ErrorKind::Unsupported,
311 format!("unsupported data layout: {}", inode.data_layout),
312 )),
313 }
314 }
315
316 fn inode_is_opaque(&mut self, inode: &InodeInfo) -> io::Result<bool> {
317 for (name, value) in self.read_inode_xattrs(inode)? {
318 if name == b"trusted.overlay.opaque" && value == b"y" {
319 return Ok(true);
320 }
321 }
322
323 Ok(false)
324 }
325
326 fn read_inode_xattrs(&mut self, inode: &InodeInfo) -> io::Result<Vec<(Vec<u8>, Vec<u8>)>> {
327 if inode.xattr_ibody_size == 0 {
328 return Ok(Vec::new());
329 }
330
331 let total = inode.xattr_ibody_size as usize;
332 if total < EROFS_XATTR_IBODY_HEADER_SIZE as usize {
333 return Err(io::Error::new(
334 io::ErrorKind::InvalidData,
335 "xattr ibody smaller than header",
336 ));
337 }
338
339 let mut offset = self.inode_offset(inode.nid)
340 + EROFS_INODE_EXTENDED_SIZE as u64
341 + EROFS_XATTR_IBODY_HEADER_SIZE as u64;
342 let mut remaining = total - EROFS_XATTR_IBODY_HEADER_SIZE as usize;
343 let mut xattrs = Vec::new();
344
345 while remaining > 0 {
346 if remaining < 4 {
347 return Err(io::Error::new(
348 io::ErrorKind::InvalidData,
349 "truncated xattr entry header",
350 ));
351 }
352
353 let mut entry = [0u8; 4];
354 read_exact_at(&self.file, offset, &mut entry)?;
355
356 let name_len = entry[0] as usize;
357 let name_index = entry[1];
358 let value_len = u16::from_le_bytes([entry[2], entry[3]]) as usize;
359 let entry_size = 4 + name_len + value_len;
360 let aligned_size = erofs_xattr_align(entry_size);
361
362 if aligned_size > remaining {
363 return Err(io::Error::new(
364 io::ErrorKind::InvalidData,
365 "xattr entry exceeds ibody size",
366 ));
367 }
368
369 let mut suffix = vec![0u8; name_len];
370 read_exact_at(&self.file, offset + 4, &mut suffix)?;
371 let mut value = vec![0u8; value_len];
372 read_exact_at(&self.file, offset + 4 + name_len as u64, &mut value)?;
373
374 let name = match name_index {
375 EROFS_XATTR_INDEX_USER => [b"user.".as_slice(), suffix.as_slice()].concat(),
376 EROFS_XATTR_INDEX_TRUSTED => [b"trusted.".as_slice(), suffix.as_slice()].concat(),
377 EROFS_XATTR_INDEX_SECURITY => [b"security.".as_slice(), suffix.as_slice()].concat(),
378 other => {
379 return Err(io::Error::new(
380 io::ErrorKind::InvalidData,
381 format!("unsupported xattr name index: {other}"),
382 ));
383 }
384 };
385
386 xattrs.push((name, value));
387 offset += aligned_size as u64;
388 remaining -= aligned_size;
389 }
390
391 Ok(xattrs)
392 }
393}
394
395struct InodeInfo {
400 nid: u32,
401 mode: u16,
402 size: u64,
403 #[cfg(test)]
404 nlink: u32,
405 data_layout: u8,
406 startblk_lo: u32,
407 rdev: u32,
408 xattr_ibody_size: u32,
409}
410
411fn read_exact_at(file: &File, offset: u64, mut buf: &mut [u8]) -> io::Result<()> {
416 let mut current_offset = offset;
417 while !buf.is_empty() {
418 let read = file.read_at(buf, current_offset)?;
419 if read == 0 {
420 return Err(io::Error::new(
421 io::ErrorKind::UnexpectedEof,
422 "unexpected EOF",
423 ));
424 }
425 current_offset += read as u64;
426 buf = &mut buf[read..];
427 }
428
429 Ok(())
430}
431
432fn dir_block(dir_data: &[u8], block_idx: usize, blksiz: usize) -> &[u8] {
433 let offset = block_idx * blksiz;
434 let end = (offset + blksiz).min(dir_data.len());
435 &dir_data[offset..end]
436}
437
438fn dir_block_dirent_count(block: &[u8]) -> io::Result<usize> {
439 if block.len() < EROFS_DIRENT_SIZE as usize {
440 return Err(io::Error::new(
441 io::ErrorKind::InvalidData,
442 "directory block smaller than one dirent",
443 ));
444 }
445
446 let first_nameoff = u16::from_le_bytes([block[8], block[9]]) as usize;
447 let dirent_size = EROFS_DIRENT_SIZE as usize;
448 if first_nameoff < dirent_size
449 || !first_nameoff.is_multiple_of(dirent_size)
450 || first_nameoff > block.len()
451 {
452 return Err(io::Error::new(
453 io::ErrorKind::InvalidData,
454 "invalid first dirent name offset",
455 ));
456 }
457
458 Ok(first_nameoff / dirent_size)
459}
460
461fn dirent_name(block: &[u8], idx: usize, dirent_count: usize) -> io::Result<&[u8]> {
462 let dirent_size = EROFS_DIRENT_SIZE as usize;
463 let dirent_off = idx
464 .checked_mul(dirent_size)
465 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "dirent offset overflow"))?;
466
467 if idx >= dirent_count || dirent_off + dirent_size > block.len() {
468 return Err(io::Error::new(
469 io::ErrorKind::InvalidData,
470 "dirent index out of bounds",
471 ));
472 }
473
474 let nameoff = u16::from_le_bytes([block[dirent_off + 8], block[dirent_off + 9]]) as usize;
475 let mut name_end = if idx + 1 < dirent_count {
476 let next_off = dirent_off + dirent_size;
477 u16::from_le_bytes([block[next_off + 8], block[next_off + 9]]) as usize
478 } else {
479 block.len()
480 };
481
482 if nameoff > name_end || name_end > block.len() {
483 return Err(io::Error::new(
484 io::ErrorKind::InvalidData,
485 "dirent name range out of bounds",
486 ));
487 }
488
489 while name_end > nameoff && block[name_end - 1] == 0 {
490 name_end -= 1;
491 }
492
493 Ok(&block[nameoff..name_end])
494}
495
496fn dirent_nid(block: &[u8], idx: usize) -> io::Result<u32> {
497 let dirent_size = EROFS_DIRENT_SIZE as usize;
498 let dirent_off = idx
499 .checked_mul(dirent_size)
500 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "dirent offset overflow"))?;
501 if dirent_off + dirent_size > block.len() {
502 return Err(io::Error::new(
503 io::ErrorKind::InvalidData,
504 "dirent NID out of bounds",
505 ));
506 }
507
508 let nid = u64::from_le_bytes([
509 block[dirent_off],
510 block[dirent_off + 1],
511 block[dirent_off + 2],
512 block[dirent_off + 3],
513 block[dirent_off + 4],
514 block[dirent_off + 5],
515 block[dirent_off + 6],
516 block[dirent_off + 7],
517 ]);
518 u32::try_from(nid)
519 .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "dirent NID overflow"))
520}
521
522fn lookup_in_dir_block(
523 block: &[u8],
524 dirent_count: usize,
525 target: &[u8],
526) -> io::Result<Option<u32>> {
527 let mut left = 0usize;
528 let mut right = dirent_count;
529
530 while left < right {
531 let mid = (left + right) / 2;
532 match target.cmp(dirent_name(block, mid, dirent_count)?) {
533 std::cmp::Ordering::Less => right = mid,
534 std::cmp::Ordering::Greater => left = mid + 1,
535 std::cmp::Ordering::Equal => return dirent_nid(block, mid).map(Some),
536 }
537 }
538
539 Ok(None)
540}
541
542fn inode_kind(inode: &InodeInfo) -> io::Result<ErofsEntryKind> {
543 match inode.mode & S_IFMT {
544 S_IFREG => Ok(ErofsEntryKind::RegularFile),
545 S_IFDIR => Ok(ErofsEntryKind::Directory),
546 S_IFLNK => Ok(ErofsEntryKind::Symlink),
547 S_IFCHR => Ok(ErofsEntryKind::CharDevice),
548 S_IFBLK => Ok(ErofsEntryKind::BlockDevice),
549 S_IFIFO => Ok(ErofsEntryKind::Fifo),
550 S_IFSOCK => Ok(ErofsEntryKind::Socket),
551 other => Err(io::Error::new(
552 io::ErrorKind::InvalidData,
553 format!("unsupported inode mode type: {other:#o}"),
554 )),
555 }
556}
557
558pub fn read_file_from_erofs(image_path: &Path, file_path: &str) -> io::Result<Vec<u8>> {
560 let file = std::fs::File::open(image_path)?;
561 let mut reader = ErofsReader::new(file)?;
562 reader.read_file(file_path)
563}
564
565pub fn entry_info_from_erofs(image_path: &Path, file_path: &str) -> io::Result<ErofsEntryInfo> {
566 let file = std::fs::File::open(image_path)?;
567 let mut reader = ErofsReader::new(file)?;
568 reader.entry_info(file_path)
569}
570
571#[cfg(test)]
576mod tests {
577 use std::{fs::File, io, path::PathBuf};
578
579 use tempfile::tempdir;
580
581 use super::ErofsReader;
582 use crate::{
583 erofs::write_erofs,
584 tree::{FileData, FileTree, InodeMetadata, RegularFileId, RegularFileNode, TreeNode},
585 };
586
587 fn make_regular_file(data: &[u8]) -> TreeNode {
588 make_regular_file_with_id(data, RegularFileId::new())
589 }
590
591 fn make_regular_file_with_id(data: &[u8], id: RegularFileId) -> TreeNode {
592 TreeNode::RegularFile(RegularFileNode {
593 id,
594 metadata: InodeMetadata::default(),
595 xattrs: Vec::new(),
596 data: FileData::Memory(data.to_vec()),
597 nlink: 1,
598 })
599 }
600
601 #[test]
602 fn lookup_path_resolves_large_multi_block_directory() {
603 let mut tree = FileTree::new();
604 for i in 0..5000 {
605 let path = format!("dir/file-{i:04}.txt");
606 tree.insert(path.as_bytes(), make_regular_file(b"x"))
607 .expect("insert file");
608 }
609
610 let output_dir = tempdir().expect("tempdir");
611 let output = output_dir.path().join("large-dir.erofs");
612 write_erofs(&tree, &output).expect("write erofs");
613
614 let file = File::open(&output).expect("open erofs");
615 let mut reader = ErofsReader::new(file).expect("reader");
616
617 assert_eq!(reader.read_file("/dir/file-0000.txt").expect("first"), b"x");
618 assert_eq!(
619 reader.read_file("/dir/file-2500.txt").expect("middle"),
620 b"x"
621 );
622 assert_eq!(reader.read_file("/dir/file-4999.txt").expect("last"), b"x");
623
624 let err = reader
625 .entry_info("/dir/file-9999.txt")
626 .expect_err("missing entry should fail");
627 assert_eq!(err.kind(), io::ErrorKind::NotFound);
628 }
629
630 #[test]
631 fn hardlinked_regular_files_share_inode_and_data_blocks() {
632 let mut tree = FileTree::new();
633 let file_id = RegularFileId::new();
634
635 tree.insert(b"alpha", make_regular_file_with_id(b"shared", file_id))
636 .expect("insert alpha");
637 tree.insert(b"beta", make_regular_file_with_id(b"shared", file_id))
638 .expect("insert beta");
639
640 let output_dir = tempdir().expect("tempdir");
641 let output = output_dir.path().join("hardlinks.erofs");
642 let data_map = write_erofs(&tree, &output).expect("write erofs");
643 let alpha_path = PathBuf::from("alpha");
644 let beta_path = PathBuf::from("beta");
645
646 assert_eq!(
647 data_map
648 .file_blocks
649 .get(&alpha_path)
650 .copied()
651 .expect("alpha data map"),
652 data_map
653 .file_blocks
654 .get(&beta_path)
655 .copied()
656 .expect("beta data map")
657 );
658
659 let file = File::open(&output).expect("open erofs");
660 let mut reader = ErofsReader::new(file).expect("reader");
661 let alpha = reader.inode_debug_info("/alpha").expect("alpha inode");
662 let beta = reader.inode_debug_info("/beta").expect("beta inode");
663
664 assert_eq!(alpha.nid, beta.nid);
665 assert_eq!(alpha.nlink, 2);
666 assert_eq!(beta.nlink, 2);
667 assert_eq!(alpha.size, b"shared".len() as u64);
668 assert_eq!(reader.read_file("/alpha").expect("read alpha"), b"shared");
669 assert_eq!(reader.read_file("/beta").expect("read beta"), b"shared");
670 }
671}