pub mod build_plan;
pub mod constants;
pub mod dir;
pub mod extent;
pub mod group;
pub mod inode;
pub mod layout;
pub mod superblock;
pub use build_plan::BuildPlan;
use std::io::Read;
use constants::{INO_ROOT_DIR, SUPERBLOCK_OFFSET};
use group::{GroupDesc, set_bit, set_first_n, test_bit};
use inode::{Inode, SpecialKind};
use layout::Layout;
use superblock::Superblock;
use crate::Result;
use crate::block::BlockDevice;
use crate::fs::rootdevs::{RootDevs, device_table};
use crate::fs::{DeviceKind, FileMeta, FileSource};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum FsKind {
#[default]
Ext2,
Ext3,
Ext4,
}
impl FsKind {
pub fn has_journal(self) -> bool {
matches!(self, FsKind::Ext3 | FsKind::Ext4)
}
}
#[derive(Debug, Clone)]
pub struct FormatOpts {
pub kind: FsKind,
pub block_size: u32,
pub blocks_count: u32,
pub inodes_count: u32,
pub uuid: [u8; 16],
pub volume_label: [u8; 16],
pub mtime: u32,
pub reserved_blocks_percent: u8,
pub create_lost_found: bool,
pub journal_blocks: u32,
}
impl Default for FormatOpts {
fn default() -> Self {
Self {
kind: FsKind::Ext2,
block_size: 1024,
blocks_count: 1024,
inodes_count: 16,
uuid: [0; 16],
volume_label: [0; 16],
mtime: 0,
reserved_blocks_percent: 5,
create_lost_found: true,
journal_blocks: 0,
}
}
}
#[derive(Debug, Clone)]
struct GroupState {
block_bitmap: Vec<u8>,
inode_bitmap: Vec<u8>,
desc: GroupDesc,
}
#[derive(Debug)]
pub struct Ext {
pub sb: Superblock,
pub layout: Layout,
pub kind: FsKind,
groups: Vec<GroupState>,
next_inode: u32,
inodes: Vec<(u32, Inode)>,
data_blocks: Vec<(u32, Vec<u8>)>,
}
impl Ext {
pub fn format_with(dev: &mut dyn BlockDevice, opts: &FormatOpts) -> Result<Self> {
let layout = layout::plan(opts.block_size, opts.blocks_count, opts.inodes_count)?;
let total_bytes = layout.blocks_count as u64 * layout.block_size as u64;
if dev.total_size() < total_bytes {
return Err(crate::Error::InvalidArgument(format!(
"ext: device has {} bytes, need {total_bytes}",
dev.total_size()
)));
}
dev.zero_range(0, total_bytes)?;
let mut sb = Superblock::ext2_default();
sb.blocks_count = layout.blocks_count;
sb.inodes_count = layout.inodes_count;
sb.first_data_block = layout.first_data_block;
sb.log_block_size = layout.block_size.trailing_zeros() - 10;
sb.log_frag_size = sb.log_block_size;
sb.blocks_per_group = layout.blocks_per_group;
sb.frags_per_group = layout.blocks_per_group;
sb.inodes_per_group = layout.inodes_per_group;
sb.mtime = opts.mtime;
sb.wtime = opts.mtime;
sb.uuid = opts.uuid;
sb.volume_name = opts.volume_label;
sb.r_blocks_count =
(layout.blocks_count as u64 * opts.reserved_blocks_percent as u64 / 100) as u32;
sb.lastcheck = opts.mtime;
let bs = layout.block_size;
let mut groups = Vec::with_capacity(layout.groups.len());
for g in &layout.groups {
let mut block_bitmap = vec![0u8; bs as usize];
let mut inode_bitmap = vec![0u8; bs as usize];
for blk in g.start_block..g.data_start {
set_bit(&mut block_bitmap, blk - g.start_block);
}
let group_blocks = g.end_block - g.start_block + 1;
for bit in group_blocks..(bs * 8) {
set_bit(&mut block_bitmap, bit);
}
for bit in layout.inodes_per_group..(bs * 8) {
set_bit(&mut inode_bitmap, bit);
}
let desc = GroupDesc {
block_bitmap: g.block_bitmap,
inode_bitmap: g.inode_bitmap,
inode_table: g.inode_table,
free_blocks_count: 0,
free_inodes_count: 0,
used_dirs_count: 0,
flags: 0,
};
groups.push(GroupState {
block_bitmap,
inode_bitmap,
desc,
});
}
let mut ext = Self {
sb,
layout,
kind: opts.kind,
groups,
next_inode: 0,
inodes: Vec::new(),
data_blocks: Vec::new(),
};
let first_ino = ext.sb.first_ino;
set_first_n(&mut ext.groups[0].inode_bitmap, first_ino - 1);
ext.next_inode = first_ino;
ext.create_root(opts.mtime)?;
if opts.create_lost_found {
ext.create_lost_found(dev, opts.mtime)?;
}
if opts.kind.has_journal() {
let blocks = if opts.journal_blocks == 0 {
1024
} else {
opts.journal_blocks
};
ext.allocate_journal(blocks, opts.mtime)?;
ext.sb.feature_compat |= constants::feature::COMPAT_HAS_JOURNAL;
ext.sb.journal_inum = constants::INO_JOURNAL;
}
if matches!(opts.kind, FsKind::Ext4) {
ext.sb.feature_incompat |= constants::feature::INCOMPAT_EXTENTS;
}
ext.recompute_free_counts();
ext.flush_metadata(dev)?;
Ok(ext)
}
fn fill_block_pointers(&mut self, inode: &mut Inode, data: &[u32]) -> Result<u32> {
if matches!(self.kind, FsKind::Ext4) {
return self.fill_block_pointers_extent(inode, data);
}
self.fill_block_pointers_indirect(inode, data)
}
fn fill_block_pointers_extent(&mut self, inode: &mut Inode, data: &[u32]) -> Result<u32> {
let runs = extent::coalesce(data);
let packed = extent::pack_into_iblock(&runs)?;
for (i, slot) in inode.block.iter_mut().enumerate() {
let off = i * 4;
*slot = u32::from_le_bytes(packed[off..off + 4].try_into().unwrap());
}
inode.flags |= constants::EXT4_EXTENTS_FL;
Ok(0)
}
fn fill_block_pointers_indirect(&mut self, inode: &mut Inode, data: &[u32]) -> Result<u32> {
let bs = self.layout.block_size;
let ptrs_per_block = (bs / 4) as usize;
let n = data.len();
let n_direct = constants::N_DIRECT.min(n);
inode.block[..n_direct].copy_from_slice(&data[..n_direct]);
let mut allocated_meta = 0u32;
let mut consumed = n_direct;
if consumed < n {
let ind = self.alloc_data_block()?;
allocated_meta += 1;
inode.block[constants::IDX_INDIRECT] = ind;
let take = (n - consumed).min(ptrs_per_block);
let mut buf = vec![0u8; bs as usize];
for (i, &b) in data[consumed..consumed + take].iter().enumerate() {
let off = i * 4;
buf[off..off + 4].copy_from_slice(&b.to_le_bytes());
}
self.data_blocks.push((ind, buf));
consumed += take;
}
if consumed < n {
let dind = self.alloc_data_block()?;
allocated_meta += 1;
inode.block[constants::IDX_DOUBLE_INDIRECT] = dind;
let mut dind_buf = vec![0u8; bs as usize];
let mut dind_slot = 0;
while consumed < n {
if dind_slot >= ptrs_per_block {
return Err(crate::Error::Unsupported(
"ext: file exceeds direct+single+double indirection capacity".into(),
));
}
let ind = self.alloc_data_block()?;
allocated_meta += 1;
let off = dind_slot * 4;
dind_buf[off..off + 4].copy_from_slice(&ind.to_le_bytes());
let take = (n - consumed).min(ptrs_per_block);
let mut ind_buf = vec![0u8; bs as usize];
for (i, &b) in data[consumed..consumed + take].iter().enumerate() {
let off = i * 4;
ind_buf[off..off + 4].copy_from_slice(&b.to_le_bytes());
}
self.data_blocks.push((ind, ind_buf));
consumed += take;
dind_slot += 1;
}
self.data_blocks.push((dind, dind_buf));
}
Ok(allocated_meta)
}
fn allocate_journal(&mut self, blocks: u32, mtime: u32) -> Result<()> {
let ino = constants::INO_JOURNAL;
let bs = self.layout.block_size;
let mut data = Vec::with_capacity(blocks as usize);
for _ in 0..blocks {
data.push(self.alloc_data_block()?);
}
let mut inode = Inode::regular(blocks * bs, 0o600, 0, 0, mtime);
let meta_blocks = self.fill_block_pointers(&mut inode, &data)?;
inode.blocks_512 = (blocks + meta_blocks) * (bs / 512);
let jsb = build_jbd2_superblock(bs, blocks);
self.data_blocks.push((data[0], jsb));
self.inodes.push((ino, inode));
Ok(())
}
fn create_root(&mut self, mtime: u32) -> Result<()> {
let ino = INO_ROOT_DIR;
set_bit(&mut self.groups[0].inode_bitmap, ino - 1);
let blk = self.alloc_data_block()?;
let block_bytes = dir::make_initial_dir_block(ino, ino, self.layout.block_size, false);
let mut inode = Inode::directory(self.layout.block_size, 0o755, 0, 0, mtime);
inode.block[0] = blk;
inode.blocks_512 = self.layout.block_size / 512;
self.groups[0].desc.used_dirs_count += 1;
self.inodes.push((ino, inode));
self.data_blocks.push((blk, block_bytes));
Ok(())
}
fn create_lost_found(&mut self, dev: &mut dyn BlockDevice, mtime: u32) -> Result<()> {
let bs = self.layout.block_size;
let target_data_blocks: u32 = 16384u32.div_ceil(bs);
let ino = self.alloc_inode()?;
let mut data_blocks = Vec::with_capacity(target_data_blocks as usize);
for _ in 0..target_data_blocks {
data_blocks.push(self.alloc_data_block()?);
}
let mut inode = Inode::directory(16384, 0o700, 0, 0, mtime);
let meta_blocks = self.fill_block_pointers(&mut inode, &data_blocks)?;
inode.blocks_512 = (target_data_blocks + meta_blocks) * (bs / 512);
let dir_block = dir::make_initial_dir_block(ino, INO_ROOT_DIR, bs, false);
self.data_blocks.push((data_blocks[0], dir_block));
for &blk in &data_blocks[1..] {
self.data_blocks.push((blk, dir::make_empty_dir_block(bs)));
}
self.groups[0].desc.used_dirs_count += 1;
self.inodes.push((ino, inode));
self.add_entry_to_dir_block_for(dev, INO_ROOT_DIR, b"lost+found", ino)?;
self.patch_inode(dev, INO_ROOT_DIR, |i| i.links_count += 1)?;
Ok(())
}
fn add_entry_to_dir_block_for(
&mut self,
dev: &mut dyn BlockDevice,
dir_inode: u32,
name: &[u8],
child_ino: u32,
) -> Result<()> {
self.ensure_inode_staged(dev, dir_inode)?;
let inode_copy = self
.inodes
.iter()
.find(|(i, _)| *i == dir_inode)
.map(|(_, i)| *i)
.unwrap();
let dir_block_num = self.file_block(dev, &inode_copy, 0)?;
if dir_block_num == 0 {
return Err(crate::Error::InvalidImage(format!(
"ext: dir inode {dir_inode} has no first data block"
)));
}
self.ensure_block_staged(dev, dir_block_num)?;
let block = self
.data_blocks
.iter_mut()
.find(|(b, _)| *b == dir_block_num)
.map(|(_, bytes)| bytes)
.unwrap();
append_dir_entry(block, name, child_ino, constants::DENT_DIR, false)
}
fn patch_inode<F: FnOnce(&mut Inode)>(
&mut self,
dev: &mut dyn BlockDevice,
ino: u32,
f: F,
) -> Result<()> {
self.ensure_inode_staged(dev, ino)?;
for (i_no, i) in self.inodes.iter_mut() {
if *i_no == ino {
f(i);
return Ok(());
}
}
unreachable!("ensure_inode_staged guarantees the inode is present")
}
fn ensure_inode_staged(&mut self, dev: &mut dyn BlockDevice, ino: u32) -> Result<()> {
if self.inodes.iter().any(|(i, _)| *i == ino) {
return Ok(());
}
let inode = self.read_inode(dev, ino)?;
self.inodes.push((ino, inode));
Ok(())
}
fn ensure_block_staged(&mut self, dev: &mut dyn BlockDevice, blk: u32) -> Result<()> {
if self.data_blocks.iter().any(|(b, _)| *b == blk) {
return Ok(());
}
let mut buf = vec![0u8; self.layout.block_size as usize];
self.read_block(dev, blk, &mut buf)?;
self.data_blocks.push((blk, buf));
Ok(())
}
fn alloc_inode(&mut self) -> Result<u32> {
if self.next_inode > self.layout.inodes_count {
return Err(crate::Error::Unsupported(format!(
"ext: out of inodes (allocated {}, max {})",
self.next_inode - 1,
self.layout.inodes_count
)));
}
let ino = self.next_inode;
let g = ((ino - 1) / self.layout.inodes_per_group) as usize;
let idx = (ino - 1) % self.layout.inodes_per_group;
set_bit(&mut self.groups[g].inode_bitmap, idx);
self.next_inode += 1;
Ok(ino)
}
fn alloc_data_block(&mut self) -> Result<u32> {
for gi in 0..self.layout.groups.len() {
let layout_g = self.layout.groups[gi];
let start_rel = layout_g.data_start - layout_g.start_block;
let group_blocks = layout_g.end_block - layout_g.start_block + 1;
let bitmap = &mut self.groups[gi].block_bitmap;
for bit in start_rel..group_blocks {
if !test_bit(bitmap, bit) {
set_bit(bitmap, bit);
return Ok(layout_g.start_block + bit);
}
}
}
Err(crate::Error::Unsupported(
"ext: filesystem has no free data blocks".into(),
))
}
fn recompute_free_counts(&mut self) {
let mut total_free_blocks = 0u64;
let mut total_free_inodes = 0u64;
for (i, g) in self.layout.groups.iter().enumerate() {
let group_blocks = g.end_block - g.start_block + 1;
let used_blocks = popcount_bits(&self.groups[i].block_bitmap, 0, group_blocks);
let free_blocks = group_blocks - used_blocks;
let used_inodes = popcount_bits(
&self.groups[i].inode_bitmap,
0,
self.layout.inodes_per_group,
);
let free_inodes = self.layout.inodes_per_group - used_inodes;
self.groups[i].desc.free_blocks_count = free_blocks as u16;
self.groups[i].desc.free_inodes_count = free_inodes as u16;
total_free_blocks += free_blocks as u64;
total_free_inodes += free_inodes as u64;
}
self.sb.free_blocks_count = total_free_blocks as u32;
self.sb.free_inodes_count = total_free_inodes as u32;
}
fn flush_metadata(&mut self, dev: &mut dyn BlockDevice) -> Result<()> {
let bs = self.layout.block_size as u64;
let desc_size = self.layout.desc_size;
let mut gdt = vec![0u8; self.layout.gdt_blocks as usize * bs as usize];
for (i, g) in self.groups.iter().enumerate() {
let off = i * desc_size;
gdt[off..off + constants::GROUP_DESC_SIZE].copy_from_slice(&g.desc.encode());
}
for (i, g) in self.layout.groups.iter().enumerate() {
if !g.has_superblock {
continue;
}
if i != 0 {
let mut sb_copy = self.sb.clone();
sb_copy.block_group_nr = i as u16;
dev.write_at(g.start_block as u64 * bs, &sb_copy.encode())?;
}
let gdt_off = if i == 0 {
if self.layout.first_data_block == 1 {
2 * bs
} else {
bs
}
} else {
(g.start_block as u64 + 1) * bs
};
dev.write_at(gdt_off, &gdt)?;
dev.write_at(g.block_bitmap as u64 * bs, &self.groups[i].block_bitmap)?;
dev.write_at(g.inode_bitmap as u64 * bs, &self.groups[i].inode_bitmap)?;
}
for (ino, inode) in &self.inodes {
let (group, idx_in_group) = self.inode_location(*ino);
let table_block = self.layout.groups[group as usize].inode_table;
let off = table_block as u64 * bs + idx_in_group as u64 * self.layout.inode_size as u64;
dev.write_at(off, &inode.encode())?;
}
for (blk, bytes) in &self.data_blocks {
dev.write_at(*blk as u64 * bs, bytes)?;
}
dev.write_at(SUPERBLOCK_OFFSET, &self.sb.encode())?;
dev.sync()?;
Ok(())
}
fn inode_location(&self, ino: u32) -> (u32, u32) {
let g = (ino - 1) / self.layout.inodes_per_group;
let idx = (ino - 1) % self.layout.inodes_per_group;
(g, idx)
}
pub fn add_file_to(
&mut self,
dev: &mut dyn BlockDevice,
parent_ino: u32,
name: &[u8],
src: FileSource,
meta: FileMeta,
) -> Result<u32> {
let bs = self.layout.block_size;
let len = src.len()?;
if len > u32::MAX as u64 {
return Err(crate::Error::Unsupported(
"ext: file > 4 GiB requires LARGE_FILE (deferred to ext4)".into(),
));
}
let n_data_blocks = len.div_ceil(bs as u64) as u32;
let mut data_blocks = Vec::with_capacity(n_data_blocks as usize);
for _ in 0..n_data_blocks {
data_blocks.push(self.alloc_data_block()?);
}
let ino = self.alloc_inode()?;
let mut inode = Inode::regular(
len as u32,
meta.mode & 0o7777,
meta.uid,
meta.gid,
meta.mtime,
);
let allocated_meta_blocks = self.fill_block_pointers(&mut inode, &data_blocks)?;
inode.blocks_512 = (n_data_blocks + allocated_meta_blocks) * (bs / 512);
let (mut reader, _) = src.open()?;
let mut buf = vec![0u8; bs as usize];
let mut remaining = len;
for &blk in &data_blocks {
let to_read = remaining.min(bs as u64) as usize;
reader.read_exact(&mut buf[..to_read])?;
dev.write_at(blk as u64 * bs as u64, &buf[..to_read])?;
remaining -= to_read as u64;
}
debug_assert_eq!(remaining, 0);
self.inodes.push((ino, inode));
self.add_entry_to_dir_block_for(dev, parent_ino, name, ino)?;
Ok(ino)
}
pub fn add_dir_to(
&mut self,
dev: &mut dyn BlockDevice,
parent_ino: u32,
name: &[u8],
meta: FileMeta,
) -> Result<u32> {
let bs = self.layout.block_size;
let ino = self.alloc_inode()?;
let blk = self.alloc_data_block()?;
let mut inode = Inode::directory(bs, meta.mode & 0o7777, meta.uid, meta.gid, meta.mtime);
if matches!(self.kind, FsKind::Ext4) {
self.fill_block_pointers_extent(&mut inode, &[blk])?;
} else {
inode.block[0] = blk;
}
inode.blocks_512 = bs / 512;
let block_bytes = dir::make_initial_dir_block(ino, parent_ino, bs, false);
self.data_blocks.push((blk, block_bytes));
self.inodes.push((ino, inode));
self.groups[0].desc.used_dirs_count += 1;
self.add_entry_to_dir_block_for(dev, parent_ino, name, ino)?;
self.patch_inode(dev, parent_ino, |i| i.links_count += 1)?;
Ok(ino)
}
pub fn add_symlink_to(
&mut self,
dev: &mut dyn BlockDevice,
parent_ino: u32,
name: &[u8],
target: &[u8],
meta: FileMeta,
) -> Result<u32> {
if target.len() > 4095 {
return Err(crate::Error::Unsupported(
"ext: symlink target > 4095 bytes".into(),
));
}
let bs = self.layout.block_size;
let ino = self.alloc_inode()?;
let mut inode = Inode::symlink(
target.len() as u32,
meta.mode & 0o7777,
meta.uid,
meta.gid,
meta.mtime,
);
const FAST_MAX: usize = 60;
if target.len() <= FAST_MAX {
let mut packed = [0u8; FAST_MAX];
packed[..target.len()].copy_from_slice(target);
for (i, slot) in inode.block.iter_mut().enumerate() {
let off = i * 4;
*slot = u32::from_le_bytes(packed[off..off + 4].try_into().unwrap());
}
} else {
let blk = self.alloc_data_block()?;
inode.block[0] = blk;
inode.blocks_512 = bs / 512;
let mut buf = vec![0u8; bs as usize];
buf[..target.len()].copy_from_slice(target);
dev.write_at(blk as u64 * bs as u64, &buf)?;
}
self.inodes.push((ino, inode));
self.add_entry_to_dir_block_for(dev, parent_ino, name, ino)?;
Ok(ino)
}
#[allow(clippy::too_many_arguments)]
pub fn add_device_to(
&mut self,
dev: &mut dyn BlockDevice,
parent_ino: u32,
name: &[u8],
kind: DeviceKind,
major: u32,
minor: u32,
meta: FileMeta,
) -> Result<u32> {
let ino = self.alloc_inode()?;
let special = match kind {
DeviceKind::Char => SpecialKind::Char,
DeviceKind::Block => SpecialKind::Block,
DeviceKind::Fifo => SpecialKind::Fifo,
DeviceKind::Socket => SpecialKind::Socket,
};
let inode = Inode::special(
special,
major,
minor,
meta.mode & 0o7777,
meta.uid,
meta.gid,
meta.mtime,
);
self.inodes.push((ino, inode));
self.add_entry_to_dir_block_for(dev, parent_ino, name, ino)?;
Ok(ino)
}
pub fn flush(&mut self, dev: &mut dyn BlockDevice) -> Result<()> {
self.recompute_free_counts();
self.flush_metadata(dev)
}
pub fn populate_from_host_dir(
&mut self,
dev: &mut dyn BlockDevice,
parent_ino: u32,
src: &std::path::Path,
) -> Result<()> {
use std::os::unix::fs::{FileTypeExt, PermissionsExt};
for entry in std::fs::read_dir(src)? {
let entry = entry?;
let meta = entry.metadata()?;
let ft = meta.file_type();
let name = entry.file_name();
let name_bytes = name.as_encoded_bytes();
let mode = (meta.permissions().mode() & 0o7777) as u16;
let fmeta = FileMeta {
mode,
uid: 0,
gid: 0,
mtime: 0,
atime: 0,
ctime: 0,
};
if ft.is_dir() {
let child = self.add_dir_to(dev, parent_ino, name_bytes, fmeta)?;
self.populate_from_host_dir(dev, child, &entry.path())?;
} else if ft.is_file() {
let src_path = entry.path();
self.add_file_to(
dev,
parent_ino,
name_bytes,
FileSource::HostPath(src_path),
fmeta,
)?;
} else if ft.is_symlink() {
let target = std::fs::read_link(entry.path())?;
let target_str = target.to_string_lossy();
self.add_symlink_to(dev, parent_ino, name_bytes, target_str.as_bytes(), fmeta)?;
} else if ft.is_block_device() || ft.is_char_device() {
use std::os::unix::fs::MetadataExt;
let rdev = meta.rdev();
let major = ((rdev >> 8) & 0xfff) | ((rdev >> 32) & !0xfff);
let minor = (rdev & 0xff) | ((rdev >> 12) & !0xff);
let kind = if ft.is_char_device() {
DeviceKind::Char
} else {
DeviceKind::Block
};
self.add_device_to(
dev,
parent_ino,
name_bytes,
kind,
major as u32,
minor as u32,
fmeta,
)?;
} else if ft.is_fifo() {
self.add_device_to(dev, parent_ino, name_bytes, DeviceKind::Fifo, 0, 0, fmeta)?;
} else if ft.is_socket() {
self.add_device_to(dev, parent_ino, name_bytes, DeviceKind::Socket, 0, 0, fmeta)?;
}
}
Ok(())
}
pub fn build_from_host_dir(
dev: &mut dyn BlockDevice,
src: &std::path::Path,
kind: FsKind,
block_size: u32,
) -> Result<Self> {
let mut plan = BuildPlan::new(block_size, kind);
plan.scan_host_path(src)?;
let opts = plan.to_format_opts();
let mut ext = Self::format_with(dev, &opts)?;
ext.populate_from_host_dir(dev, INO_ROOT_DIR, src)?;
ext.flush(dev)?;
Ok(ext)
}
pub fn populate_rootdevs(
&mut self,
dev: &mut dyn BlockDevice,
kind: RootDevs,
owner_uid: u32,
owner_gid: u32,
mtime: u32,
) -> Result<Option<u32>> {
if kind == RootDevs::None {
return Ok(None);
}
let entries = device_table(kind);
if entries.is_empty() {
return Ok(None);
}
let dir_meta = FileMeta {
mode: 0o755,
uid: owner_uid,
gid: owner_gid,
mtime,
atime: mtime,
ctime: mtime,
};
let dev_ino = self.add_dir_to(dev, INO_ROOT_DIR, b"dev", dir_meta)?;
for e in entries {
let meta = FileMeta {
mode: e.mode,
uid: owner_uid,
gid: owner_gid,
mtime,
atime: mtime,
ctime: mtime,
};
self.add_device_to(
dev,
dev_ino,
e.name.as_bytes(),
e.kind,
e.major,
e.minor,
meta,
)?;
}
Ok(Some(dev_ino))
}
pub fn open(dev: &mut dyn BlockDevice) -> Result<Self> {
let mut sb_buf = [0u8; constants::SUPERBLOCK_SIZE];
dev.read_at(constants::SUPERBLOCK_OFFSET, &mut sb_buf)?;
let sb = Superblock::decode(&sb_buf)?;
let mut layout = layout::from_superblock(&sb)?;
let bs = layout.block_size as u64;
let gdt_off = if layout.first_data_block == 1 {
2 * bs
} else {
bs
};
let mut gdt = vec![0u8; layout.gdt_blocks as usize * bs as usize];
dev.read_at(gdt_off, &mut gdt)?;
let desc_size = layout.desc_size;
let mut groups = Vec::with_capacity(layout.groups.len());
for i in 0..layout.groups.len() {
let off = i * desc_size;
let desc = GroupDesc::decode(&gdt[off..off + constants::GROUP_DESC_SIZE]);
layout.groups[i].block_bitmap = desc.block_bitmap;
layout.groups[i].inode_bitmap = desc.inode_bitmap;
layout.groups[i].inode_table = desc.inode_table;
let mut block_bitmap = vec![0u8; bs as usize];
dev.read_at(desc.block_bitmap as u64 * bs, &mut block_bitmap)?;
let mut inode_bitmap = vec![0u8; bs as usize];
dev.read_at(desc.inode_bitmap as u64 * bs, &mut inode_bitmap)?;
groups.push(GroupState {
block_bitmap,
inode_bitmap,
desc,
});
}
let mut next_inode = sb.first_ino;
while next_inode <= layout.inodes_per_group
&& test_bit(&groups[0].inode_bitmap, next_inode - 1)
{
next_inode += 1;
}
let kind = if sb.feature_incompat & constants::feature::INCOMPAT_EXTENTS != 0 {
FsKind::Ext4
} else if sb.feature_compat & constants::feature::COMPAT_HAS_JOURNAL != 0 {
FsKind::Ext3
} else {
FsKind::Ext2
};
Ok(Self {
sb,
layout,
kind,
groups,
next_inode,
inodes: Vec::new(),
data_blocks: Vec::new(),
})
}
pub fn read_inode(&self, dev: &mut dyn BlockDevice, ino: u32) -> Result<Inode> {
if ino == 0 || ino > self.layout.inodes_count {
return Err(crate::Error::InvalidArgument(format!(
"ext: inode {ino} out of range"
)));
}
for (i, staged) in &self.inodes {
if *i == ino {
return Ok(*staged);
}
}
let (group, idx) = self.inode_location(ino);
let table_block = self.layout.groups[group as usize].inode_table;
let bs = self.layout.block_size as u64;
let off = table_block as u64 * bs + idx as u64 * self.layout.inode_size as u64;
let mut buf = [0u8; inode::INODE_BASE_SIZE];
dev.read_at(off, &mut buf)?;
Ok(Inode::decode(&buf))
}
fn read_block(&self, dev: &mut dyn BlockDevice, blk: u32, out: &mut [u8]) -> Result<()> {
for (b, bytes) in &self.data_blocks {
if *b == blk {
out.copy_from_slice(bytes);
return Ok(());
}
}
let bs = self.layout.block_size as u64;
dev.read_at(blk as u64 * bs, out)?;
Ok(())
}
pub fn file_block(&self, dev: &mut dyn BlockDevice, ino: &Inode, n: u32) -> Result<u32> {
if ino.flags & constants::EXT4_EXTENTS_FL != 0 {
return self.file_block_extent(ino, n);
}
if (n as usize) < constants::N_DIRECT {
return Ok(ino.block[n as usize]);
}
let ptrs_per_block = self.layout.block_size / 4;
let n_off = n - constants::N_DIRECT as u32;
if n_off < ptrs_per_block {
let ind = ino.block[constants::IDX_INDIRECT];
if ind == 0 {
return Err(crate::Error::InvalidImage(
"ext: indirect block index unset".into(),
));
}
let mut buf = vec![0u8; self.layout.block_size as usize];
self.read_block(dev, ind, &mut buf)?;
let off = (n_off as usize) * 4;
return Ok(u32::from_le_bytes(buf[off..off + 4].try_into().unwrap()));
}
Err(crate::Error::Unsupported(
"ext: double/triple indirection not yet supported in reader".into(),
))
}
fn file_block_extent(&self, ino: &Inode, n: u32) -> Result<u32> {
let mut buf = [0u8; 60];
for (i, slot) in ino.block.iter().enumerate() {
let off = i * 4;
buf[off..off + 4].copy_from_slice(&slot.to_le_bytes());
}
let magic = u16::from_le_bytes(buf[0..2].try_into().unwrap());
if magic != extent::EXT4_EXT_MAGIC {
return Err(crate::Error::InvalidImage(format!(
"ext4: extent header magic {magic:#06x} != {:#06x}",
extent::EXT4_EXT_MAGIC
)));
}
let entries = u16::from_le_bytes(buf[2..4].try_into().unwrap()) as usize;
let depth = u16::from_le_bytes(buf[6..8].try_into().unwrap());
if depth != 0 {
return Err(crate::Error::Unsupported(
"ext4: multi-level extent trees not yet supported in reader".into(),
));
}
for i in 0..entries {
let off = 12 + i * 12;
let ee_block = u32::from_le_bytes(buf[off..off + 4].try_into().unwrap());
let ee_len = u16::from_le_bytes(buf[off + 4..off + 6].try_into().unwrap());
let ee_start_hi = u16::from_le_bytes(buf[off + 6..off + 8].try_into().unwrap()) as u64;
let ee_start_lo = u32::from_le_bytes(buf[off + 8..off + 12].try_into().unwrap()) as u64;
let len = if ee_len > extent::MAX_LEN_PER_EXTENT {
ee_len - extent::MAX_LEN_PER_EXTENT
} else {
ee_len
};
if n >= ee_block && n < ee_block + len as u32 {
let phys = (ee_start_hi << 32) | ee_start_lo;
return Ok((phys + (n - ee_block) as u64) as u32);
}
}
Ok(0)
}
pub fn list_inode(
&self,
dev: &mut dyn BlockDevice,
ino: u32,
) -> Result<Vec<crate::fs::DirEntry>> {
let inode = self.read_inode(dev, ino)?;
if inode.mode & constants::S_IFMT != constants::S_IFDIR {
return Err(crate::Error::InvalidArgument(format!(
"ext: inode {ino} is not a directory"
)));
}
let bs = self.layout.block_size;
let n_blocks = inode.size.div_ceil(bs);
let mut out = Vec::new();
let with_filetype = self.sb.feature_incompat & constants::feature::INCOMPAT_FILETYPE != 0;
let mut block_buf = vec![0u8; bs as usize];
for n in 0..n_blocks {
let blk = self.file_block(dev, &inode, n)?;
if blk == 0 {
continue;
}
self.read_block(dev, blk, &mut block_buf)?;
let mut off = 0usize;
while off < block_buf.len() {
let Some(entry) = dir::decode_entry(&block_buf[off..], with_filetype) else {
break;
};
if entry.inode != 0 && !entry.name.is_empty() {
let child = self.read_inode(dev, entry.inode)?;
out.push(crate::fs::DirEntry {
name: String::from_utf8_lossy(entry.name).into_owned(),
inode: entry.inode,
kind: kind_from_mode(child.mode),
});
}
off += entry.rec_len;
if entry.rec_len == 0 {
break;
}
}
}
Ok(out)
}
pub fn path_to_inode(&self, dev: &mut dyn BlockDevice, path: &str) -> Result<u32> {
if !path.starts_with('/') {
return Err(crate::Error::InvalidArgument(format!(
"ext: path must be absolute, got {path:?}"
)));
}
let mut cur = constants::INO_ROOT_DIR;
for comp in path.split('/').filter(|c| !c.is_empty()) {
let entries = self.list_inode(dev, cur)?;
let next = entries
.iter()
.find(|e| e.name == comp)
.map(|e| e.inode)
.ok_or_else(|| {
crate::Error::InvalidArgument(format!("ext: no such entry {comp:?} in path"))
})?;
cur = next;
}
Ok(cur)
}
pub fn open_file_reader<'a>(
&'a self,
dev: &'a mut dyn BlockDevice,
ino: u32,
) -> Result<FileReader<'a>> {
let inode = self.read_inode(dev, ino)?;
if inode.mode & constants::S_IFMT != constants::S_IFREG {
return Err(crate::Error::InvalidArgument(format!(
"ext: inode {ino} is not a regular file"
)));
}
Ok(FileReader {
ext: self,
dev,
inode,
pos: 0,
block_buf: vec![0u8; self.layout.block_size as usize],
cached_block: u32::MAX,
})
}
}
pub struct FileReader<'a> {
ext: &'a Ext,
dev: &'a mut dyn BlockDevice,
inode: Inode,
pos: u64,
block_buf: Vec<u8>,
cached_block: u32,
}
impl<'a> Read for FileReader<'a> {
fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
let total = self.inode.size as u64;
if self.pos >= total {
return Ok(0);
}
let bs = self.ext.layout.block_size as u64;
let block_idx = (self.pos / bs) as u32;
let block_off = (self.pos % bs) as usize;
if self.cached_block != block_idx {
let abs = self
.ext
.file_block(self.dev, &self.inode, block_idx)
.map_err(std::io::Error::other)?;
if abs == 0 {
self.block_buf.fill(0);
} else {
self.dev
.read_at(abs as u64 * bs, &mut self.block_buf)
.map_err(std::io::Error::other)?;
}
self.cached_block = block_idx;
}
let remaining_in_block = bs as usize - block_off;
let remaining_in_file = (total - self.pos) as usize;
let n = out.len().min(remaining_in_block).min(remaining_in_file);
out[..n].copy_from_slice(&self.block_buf[block_off..block_off + n]);
self.pos += n as u64;
Ok(n)
}
}
fn build_jbd2_superblock(block_size: u32, journal_blocks: u32) -> Vec<u8> {
let mut buf = vec![0u8; block_size as usize];
buf[0..4].copy_from_slice(&0xC03B_3998u32.to_be_bytes()); buf[4..8].copy_from_slice(&4u32.to_be_bytes()); buf[12..16].copy_from_slice(&block_size.to_be_bytes()); buf[16..20].copy_from_slice(&journal_blocks.to_be_bytes()); buf[20..24].copy_from_slice(&1u32.to_be_bytes()); buf[24..28].copy_from_slice(&1u32.to_be_bytes()); buf[64..68].copy_from_slice(&1u32.to_be_bytes()); buf
}
fn split_path(path: &std::path::Path) -> Result<(std::path::PathBuf, String)> {
let s = path
.to_str()
.ok_or_else(|| crate::Error::InvalidArgument(format!("ext: non-UTF-8 path {path:?}")))?;
if !s.starts_with('/') {
return Err(crate::Error::InvalidArgument(format!(
"ext: path must be absolute, got {s:?}"
)));
}
if s == "/" {
return Err(crate::Error::InvalidArgument(
"ext: cannot create or remove the root".into(),
));
}
let trimmed = s.trim_end_matches('/');
let (parent, name) = match trimmed.rsplit_once('/') {
Some((p, n)) => (if p.is_empty() { "/" } else { p }, n),
None => {
return Err(crate::Error::InvalidArgument(format!(
"ext: bad path {s:?}"
)));
}
};
Ok((std::path::PathBuf::from(parent), name.to_string()))
}
impl crate::fs::Filesystem for Ext {
type FormatOpts = FormatOpts;
fn format(dev: &mut dyn BlockDevice, opts: &Self::FormatOpts) -> Result<Self> {
Self::format_with(dev, opts)
}
fn open(dev: &mut dyn BlockDevice) -> Result<Self> {
Self::open(dev)
}
fn create_file(
&mut self,
dev: &mut dyn BlockDevice,
path: &std::path::Path,
src: FileSource,
meta: FileMeta,
) -> Result<()> {
let (parent, name) = split_path(path)?;
let parent_str = parent
.to_str()
.ok_or_else(|| crate::Error::InvalidArgument("ext: non-UTF-8 parent path".into()))?;
let parent_ino = self.path_to_inode(dev, parent_str)?;
self.add_file_to(dev, parent_ino, name.as_bytes(), src, meta)?;
Ok(())
}
fn create_dir(
&mut self,
dev: &mut dyn BlockDevice,
path: &std::path::Path,
meta: FileMeta,
) -> Result<()> {
let (parent, name) = split_path(path)?;
let parent_str = parent.to_str().unwrap();
let parent_ino = self.path_to_inode(dev, parent_str)?;
self.add_dir_to(dev, parent_ino, name.as_bytes(), meta)?;
Ok(())
}
fn create_symlink(
&mut self,
dev: &mut dyn BlockDevice,
path: &std::path::Path,
target: &std::path::Path,
meta: FileMeta,
) -> Result<()> {
let (parent, name) = split_path(path)?;
let parent_str = parent.to_str().unwrap();
let parent_ino = self.path_to_inode(dev, parent_str)?;
let target_bytes = target
.to_str()
.ok_or_else(|| crate::Error::InvalidArgument("ext: non-UTF-8 symlink target".into()))?
.as_bytes();
self.add_symlink_to(dev, parent_ino, name.as_bytes(), target_bytes, meta)?;
Ok(())
}
fn create_device(
&mut self,
dev: &mut dyn BlockDevice,
path: &std::path::Path,
kind: DeviceKind,
major: u32,
minor: u32,
meta: FileMeta,
) -> Result<()> {
let (parent, name) = split_path(path)?;
let parent_str = parent.to_str().unwrap();
let parent_ino = self.path_to_inode(dev, parent_str)?;
self.add_device_to(dev, parent_ino, name.as_bytes(), kind, major, minor, meta)?;
Ok(())
}
fn remove(&mut self, _dev: &mut dyn BlockDevice, _path: &std::path::Path) -> Result<()> {
Err(crate::Error::Unsupported(
"ext: remove() not yet implemented".into(),
))
}
fn list(
&mut self,
dev: &mut dyn BlockDevice,
path: &std::path::Path,
) -> Result<Vec<crate::fs::DirEntry>> {
let s = path
.to_str()
.ok_or_else(|| crate::Error::InvalidArgument("ext: non-UTF-8 path".into()))?;
let ino = self.path_to_inode(dev, s)?;
self.list_inode(dev, ino)
}
fn read_file<'a>(
&'a mut self,
dev: &'a mut dyn BlockDevice,
path: &std::path::Path,
) -> Result<Box<dyn Read + 'a>> {
let s = path
.to_str()
.ok_or_else(|| crate::Error::InvalidArgument("ext: non-UTF-8 path".into()))?;
let ino = self.path_to_inode(dev, s)?;
let reader = self.open_file_reader(dev, ino)?;
Ok(Box::new(reader))
}
fn flush(&mut self, dev: &mut dyn BlockDevice) -> Result<()> {
Self::flush(self, dev)
}
}
fn kind_from_mode(mode: u16) -> crate::fs::EntryKind {
use crate::fs::EntryKind;
match mode & constants::S_IFMT {
constants::S_IFREG => EntryKind::Regular,
constants::S_IFDIR => EntryKind::Dir,
constants::S_IFLNK => EntryKind::Symlink,
constants::S_IFCHR => EntryKind::Char,
constants::S_IFBLK => EntryKind::Block,
constants::S_IFIFO => EntryKind::Fifo,
constants::S_IFSOCK => EntryKind::Socket,
_ => EntryKind::Unknown,
}
}
fn append_dir_entry(
block: &mut [u8],
name: &[u8],
inode: u32,
file_type: u8,
with_filetype: bool,
) -> Result<()> {
let needed = dir::min_rec_len(name.len());
let mut off = 0usize;
let last_off: usize;
loop {
let entry = dir::decode_entry(&block[off..], with_filetype).ok_or_else(|| {
crate::Error::InvalidImage("corrupt dir entry while appending".into())
})?;
let next = off + entry.rec_len;
if next >= block.len() {
last_off = off;
break;
}
off = next;
}
let last_entry = dir::decode_entry(&block[last_off..], with_filetype).expect("decode last");
let last_min = dir::min_rec_len(last_entry.name.len());
let last_real_end = last_off + last_entry.rec_len;
let new_entry_off = last_off + last_min;
let new_entry_space = last_real_end - new_entry_off;
if new_entry_space < needed {
return Err(crate::Error::Unsupported(
"ext: dir block full — multi-block directories not yet implemented".into(),
));
}
block[last_off + 4..last_off + 6].copy_from_slice(&(last_min as u16).to_le_bytes());
let mut tail = Vec::with_capacity(new_entry_space);
dir::encode_entry(
&mut tail,
inode,
name,
new_entry_space as u16,
file_type,
with_filetype,
);
debug_assert_eq!(tail.len(), new_entry_space);
block[new_entry_off..new_entry_off + new_entry_space].copy_from_slice(&tail);
Ok(())
}
fn popcount_bits(bm: &[u8], start: u32, end: u32) -> u32 {
(start..end).filter(|&i| test_bit(bm, i)).count() as u32
}
#[cfg(test)]
mod tests {
use super::*;
use crate::block::MemoryBackend;
#[test]
fn format_creates_clean_filesystem() {
let mut dev = MemoryBackend::new(1024 * 1024);
let opts = FormatOpts::default();
let ext = Ext::format_with(&mut dev, &opts).expect("format");
assert_eq!(ext.sb.magic, constants::EXT2_MAGIC);
assert_eq!(ext.sb.blocks_count, 1024);
assert_eq!(ext.sb.inodes_count, 16);
assert_eq!(ext.sb.block_size(), 1024);
}
}