use std::{
cell::RefCell,
collections::{BTreeMap, HashMap},
mem::size_of,
os::unix::ffi::OsStrExt,
rc::Rc,
};
use log::debug;
use xxhash_rust::xxh32::xxh32;
use zerocopy::{Immutable, IntoBytes};
use crate::{
erofs::{composefs::OverlayMetacopy, format, reader::round_up},
fsverity::FsVerityHashValue,
tree,
};
#[derive(Clone, Copy, Debug)]
enum Offset {
Header,
Superblock,
Inode,
XAttr,
Block,
End,
}
trait Output {
fn note_offset(&mut self, offset_type: Offset);
fn get(&self, offset_type: Offset, idx: usize) -> usize;
fn write(&mut self, data: &[u8]);
fn pad(&mut self, alignment: usize);
fn len(&self) -> usize;
fn get_div(&self, offset_type: Offset, idx: usize, div: usize) -> usize {
let offset = self.get(offset_type, idx);
assert_eq!(offset % div, 0);
offset / div
}
fn get_nid(&self, idx: usize) -> u64 {
self.get_div(Offset::Inode, idx, 32) as u64
}
fn get_xattr(&self, idx: usize) -> u32 {
self.get_div(Offset::XAttr, idx, 4).try_into().unwrap()
}
fn write_struct(&mut self, st: impl IntoBytes + Immutable) {
self.write(st.as_bytes());
}
}
#[derive(PartialOrd, PartialEq, Eq, Ord, Clone)]
struct XAttr {
prefix: u8,
suffix: Box<[u8]>,
value: Box<[u8]>,
}
#[derive(Clone, Default)]
struct InodeXAttrs {
shared: Vec<usize>,
local: Vec<XAttr>,
filter: u32,
}
#[derive(Debug)]
struct DirEnt<'a> {
name: &'a [u8],
inode: usize,
file_type: format::FileType,
}
#[derive(Debug, Default)]
struct Directory<'a> {
blocks: Box<[Box<[DirEnt<'a>]>]>,
inline: Box<[DirEnt<'a>]>,
size: usize,
nlink: usize,
}
#[derive(Debug)]
struct Leaf<'a, ObjectID: FsVerityHashValue> {
content: &'a tree::LeafContent<ObjectID>,
nlink: usize,
}
#[derive(Debug)]
enum InodeContent<'a, ObjectID: FsVerityHashValue> {
Directory(Directory<'a>),
Leaf(Leaf<'a, ObjectID>),
}
struct Inode<'a, ObjectID: FsVerityHashValue> {
stat: &'a tree::Stat,
xattrs: InodeXAttrs,
content: InodeContent<'a, ObjectID>,
}
impl XAttr {
pub fn write(&self, output: &mut impl Output) {
output.write_struct(format::XAttrHeader {
name_len: self.suffix.len() as u8,
name_index: self.prefix,
value_size: (self.value.len() as u16).into(),
});
output.write(&self.suffix);
output.write(&self.value);
output.pad(4);
}
}
impl InodeXAttrs {
fn add(&mut self, name: &[u8], value: &[u8]) {
for (idx, prefix) in format::XATTR_PREFIXES.iter().enumerate().rev() {
if let Some(suffix) = name.strip_prefix(*prefix) {
self.filter |= 1 << (xxh32(suffix, format::XATTR_FILTER_SEED + idx as u32) % 32);
self.local.push(XAttr {
prefix: idx as u8,
suffix: Box::from(suffix),
value: Box::from(value),
});
return;
}
}
unreachable!("{:?}", std::str::from_utf8(name)); }
fn write(&self, output: &mut impl Output) {
if self.filter != 0 {
debug!(" write xattrs block");
output.write_struct(format::InodeXAttrHeader {
name_filter: (!self.filter).into(),
shared_count: self.shared.len() as u8,
..Default::default()
});
for idx in &self.shared {
debug!(" shared {} @{}", idx, output.len());
output.write(&output.get_xattr(*idx).to_le_bytes());
}
for attr in &self.local {
debug!(" local @{}", output.len());
attr.write(output);
}
}
}
}
impl<'a> Directory<'a> {
pub fn from_entries(entries: Vec<DirEnt<'a>>) -> Self {
let mut blocks = vec![];
let mut rest = vec![];
let mut n_bytes = 0;
let mut nlink = 0;
debug!("Directory with {} items", entries.len());
for entry in entries.into_iter() {
let entry_size = size_of::<format::DirectoryEntryHeader>() + entry.name.len();
assert!(entry_size <= 4096);
debug!(" {:?}", entry.file_type);
if matches!(entry.file_type, format::FileType::Directory) {
nlink += 1;
}
n_bytes += entry_size;
if n_bytes <= 4096 {
rest.push(entry);
} else {
debug!(" block {}", rest.len());
blocks.push(rest.into_boxed_slice());
rest = vec![entry];
n_bytes = entry_size;
}
}
if n_bytes > 2048 {
blocks.push(rest.into_boxed_slice());
rest = vec![];
n_bytes = 0;
}
debug!(
" blocks {} inline {} inline_size {n_bytes}",
blocks.len(),
rest.len()
);
let size = format::BLOCK_SIZE * blocks.len() + n_bytes;
Self {
blocks: blocks.into_boxed_slice(),
inline: rest.into_boxed_slice(),
size,
nlink,
}
}
fn write_block(&self, output: &mut impl Output, block: &[DirEnt]) {
debug!(" write dir block {} @{}", block.len(), output.len());
let mut nameofs = size_of::<format::DirectoryEntryHeader>() * block.len();
for entry in block {
debug!(
" entry {:?} name {} @{}",
entry.file_type,
nameofs,
output.len()
);
output.write_struct(format::DirectoryEntryHeader {
name_offset: (nameofs as u16).into(),
inode_offset: output.get_nid(entry.inode).into(),
file_type: entry.file_type.into(),
..Default::default()
});
nameofs += entry.name.len();
}
for entry in block {
debug!(" name @{}", output.len());
output.write(entry.name.as_bytes());
}
}
fn write_inline(&self, output: &mut impl Output) {
debug!(
" write inline len {} expected size {} of {}",
self.inline.len(),
self.size % 4096,
self.size
);
self.write_block(output, &self.inline);
}
fn write_blocks(&self, output: &mut impl Output) {
for block in &self.blocks {
assert_eq!(output.len() % format::BLOCK_SIZE, 0);
self.write_block(output, block);
output.pad(format::BLOCK_SIZE);
}
}
fn inode_meta(&self, block_offset: usize) -> (format::DataLayout, u32, u64, usize) {
let (layout, u) = if self.inline.is_empty() {
(format::DataLayout::FlatPlain, block_offset as u32 / 4096)
} else if !self.blocks.is_empty() {
(format::DataLayout::FlatInline, block_offset as u32 / 4096)
} else {
(format::DataLayout::FlatInline, 0)
};
(layout, u, self.size as u64, self.nlink)
}
}
impl<ObjectID: FsVerityHashValue> Leaf<'_, ObjectID> {
fn inode_meta(&self) -> (format::DataLayout, u32, u64, usize) {
let (layout, u, size) = match &self.content {
tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => {
if data.is_empty() {
(format::DataLayout::FlatPlain, 0, data.len() as u64)
} else {
(format::DataLayout::FlatInline, 0, data.len() as u64)
}
}
tree::LeafContent::Regular(tree::RegularFile::External(.., size)) => {
(format::DataLayout::ChunkBased, 31, *size)
}
tree::LeafContent::CharacterDevice(rdev) | tree::LeafContent::BlockDevice(rdev) => {
(format::DataLayout::FlatPlain, *rdev as u32, 0)
}
tree::LeafContent::Fifo | tree::LeafContent::Socket => {
(format::DataLayout::FlatPlain, 0, 0)
}
tree::LeafContent::Symlink(target) => {
(format::DataLayout::FlatInline, 0, target.len() as u64)
}
};
(layout, u, size, self.nlink)
}
fn write_inline(&self, output: &mut impl Output) {
output.write(match self.content {
tree::LeafContent::Regular(tree::RegularFile::Inline(data)) => data,
tree::LeafContent::Regular(tree::RegularFile::External(..)) => b"\xff\xff\xff\xff", tree::LeafContent::Symlink(target) => target.as_bytes(),
_ => &[],
});
}
}
impl<ObjectID: FsVerityHashValue> Inode<'_, ObjectID> {
fn file_type(&self) -> format::FileType {
match &self.content {
InodeContent::Directory(..) => format::FileType::Directory,
InodeContent::Leaf(leaf) => match &leaf.content {
tree::LeafContent::Regular(..) => format::FileType::RegularFile,
tree::LeafContent::CharacterDevice(..) => format::FileType::CharacterDevice,
tree::LeafContent::BlockDevice(..) => format::FileType::BlockDevice,
tree::LeafContent::Fifo => format::FileType::Fifo,
tree::LeafContent::Socket => format::FileType::Socket,
tree::LeafContent::Symlink(..) => format::FileType::Symlink,
},
}
}
fn write_inode(&self, output: &mut impl Output, idx: usize) {
let (layout, u, size, nlink) = match &self.content {
InodeContent::Directory(dir) => dir.inode_meta(output.get(Offset::Block, idx)),
InodeContent::Leaf(leaf) => leaf.inode_meta(),
};
let xattr_size = {
let mut xattr = FirstPass::default();
self.xattrs.write(&mut xattr);
xattr.offset
};
output.pad(32);
if matches!(layout, format::DataLayout::FlatInline) {
let inode_and_xattr_size = size_of::<format::ExtendedInodeHeader>() + xattr_size;
let inline_start = output.len() + inode_and_xattr_size;
let end_of_metadata = inline_start - 1;
let inline_end = inline_start + (size as usize % format::BLOCK_SIZE);
if end_of_metadata / format::BLOCK_SIZE != inline_end / format::BLOCK_SIZE {
let pad = vec![0; 4096 - end_of_metadata % 4096];
debug!("added pad {}", pad.len());
output.write(&pad);
output.pad(32);
}
}
let format = format::InodeLayout::Extended | layout;
debug!(
"write inode {idx} nid {} {:?} {:?} xattrsize{xattr_size} icount{} inline{} @{}",
output.len() / 32,
format,
self.file_type(),
match xattr_size {
0 => 0,
n => (1 + (n - 12) / 4) as u16,
},
size % 4096,
output.len()
);
output.note_offset(Offset::Inode);
output.write_struct(format::ExtendedInodeHeader {
format,
xattr_icount: match xattr_size {
0 => 0,
n => (1 + (n - 12) / 4) as u16,
}
.into(),
mode: self.file_type() | self.stat.st_mode,
size: size.into(),
u: u.into(),
ino: ((output.len() / 32) as u32).into(),
uid: self.stat.st_uid.into(),
gid: self.stat.st_gid.into(),
mtime: (self.stat.st_mtim_sec as u64).into(),
nlink: (nlink as u32).into(),
..Default::default()
});
self.xattrs.write(output);
match &self.content {
InodeContent::Directory(dir) => dir.write_inline(output),
InodeContent::Leaf(leaf) => leaf.write_inline(output),
};
output.pad(32);
}
fn write_blocks(&self, output: &mut impl Output) {
if let InodeContent::Directory(dir) = &self.content {
dir.write_blocks(output);
}
}
}
struct InodeCollector<'a, ObjectID: FsVerityHashValue> {
inodes: Vec<Inode<'a, ObjectID>>,
hardlinks: HashMap<*const tree::Leaf<ObjectID>, usize>,
}
impl<'a, ObjectID: FsVerityHashValue> InodeCollector<'a, ObjectID> {
fn push_inode(&mut self, stat: &'a tree::Stat, content: InodeContent<'a, ObjectID>) -> usize {
let mut xattrs = InodeXAttrs::default();
if let InodeContent::Leaf(Leaf {
content: tree::LeafContent::Regular(tree::RegularFile::External(id, ..)),
..
}) = content
{
xattrs.add(
b"trusted.overlay.metacopy",
OverlayMetacopy::new(id).as_bytes(),
);
let redirect = format!("/{}", id.to_object_pathname());
xattrs.add(b"trusted.overlay.redirect", redirect.as_bytes());
}
for (name, value) in RefCell::borrow(&stat.xattrs).iter() {
let name = name.as_bytes();
if let Some(escapee) = name.strip_prefix(b"trusted.overlay.") {
let escaped = [b"trusted.overlay.overlay.", escapee].concat();
xattrs.add(&escaped, value);
} else {
xattrs.add(name, value);
}
}
let inode = self.inodes.len();
self.inodes.push(Inode {
stat,
xattrs,
content,
});
inode
}
fn collect_leaf(&mut self, leaf: &'a Rc<tree::Leaf<ObjectID>>) -> usize {
let nlink = Rc::strong_count(leaf);
if nlink > 1 {
if let Some(inode) = self.hardlinks.get(&Rc::as_ptr(leaf)) {
return *inode;
}
}
let inode = self.push_inode(
&leaf.stat,
InodeContent::Leaf(Leaf {
content: &leaf.content,
nlink,
}),
);
if nlink > 1 {
self.hardlinks.insert(Rc::as_ptr(leaf), inode);
}
inode
}
fn insert_sorted(
entries: &mut Vec<DirEnt<'a>>,
name: &'a [u8],
inode: usize,
file_type: format::FileType,
) {
let entry = DirEnt {
name,
inode,
file_type,
};
let point = entries.partition_point(|e| e.name < entry.name);
entries.insert(point, entry);
}
fn collect_dir(&mut self, dir: &'a tree::Directory<ObjectID>, parent: usize) -> usize {
let me = self.push_inode(&dir.stat, InodeContent::Directory(Directory::default()));
let mut entries = vec![];
for (name, inode) in dir.sorted_entries() {
let child = match inode {
tree::Inode::Directory(dir) => self.collect_dir(dir, me),
tree::Inode::Leaf(leaf) => self.collect_leaf(leaf),
};
entries.push(DirEnt {
name: name.as_bytes(),
inode: child,
file_type: self.inodes[child].file_type(),
});
}
Self::insert_sorted(&mut entries, b".", me, format::FileType::Directory);
Self::insert_sorted(&mut entries, b"..", parent, format::FileType::Directory);
self.inodes[me].content = InodeContent::Directory(Directory::from_entries(entries));
me
}
pub fn collect(fs: &'a tree::FileSystem<ObjectID>) -> Vec<Inode<'a, ObjectID>> {
let mut this = Self {
inodes: vec![],
hardlinks: HashMap::new(),
};
let root_inode = this.collect_dir(&fs.root, 0);
assert_eq!(root_inode, 0);
this.inodes
}
}
fn share_xattrs(inodes: &mut [Inode<impl FsVerityHashValue>]) -> Vec<XAttr> {
let mut xattrs: BTreeMap<XAttr, usize> = BTreeMap::new();
for inode in inodes.iter() {
for attr in &inode.xattrs.local {
if let Some(count) = xattrs.get_mut(attr) {
*count += 1;
} else {
xattrs.insert(attr.clone(), 1);
}
}
}
xattrs.retain(|_k, v| *v > 1);
for (idx, value) in xattrs.values_mut().enumerate() {
*value = idx;
}
for inode in inodes.iter_mut() {
inode.xattrs.local.retain(|attr| {
if let Some(idx) = xattrs.get(attr) {
inode.xattrs.shared.push(*idx);
false } else {
true }
});
}
xattrs.into_keys().collect()
}
fn write_erofs(
output: &mut impl Output,
inodes: &[Inode<impl FsVerityHashValue>],
xattrs: &[XAttr],
) {
output.note_offset(Offset::Header);
output.write_struct(format::ComposefsHeader {
magic: format::COMPOSEFS_MAGIC,
version: format::VERSION,
flags: 0.into(),
composefs_version: format::COMPOSEFS_VERSION,
..Default::default()
});
output.pad(1024);
output.note_offset(Offset::Superblock);
output.write_struct(format::Superblock {
magic: format::MAGIC_V1,
blkszbits: format::BLOCK_BITS,
feature_compat: format::FEATURE_COMPAT_MTIME | format::FEATURE_COMPAT_XATTR_FILTER,
root_nid: (output.get_nid(0) as u16).into(),
inos: (inodes.len() as u64).into(),
blocks: ((output.get(Offset::End, 0) / format::BLOCK_SIZE) as u32).into(),
..Default::default()
});
for (idx, inode) in inodes.iter().enumerate() {
inode.write_inode(output, idx);
}
for xattr in xattrs {
output.note_offset(Offset::XAttr);
xattr.write(output);
}
output.pad(4096);
for inode in inodes.iter() {
output.note_offset(Offset::Block);
inode.write_blocks(output);
}
output.note_offset(Offset::End);
}
#[derive(Default)]
struct Layout {
offset_types: Vec<usize>,
offsets: Vec<usize>,
}
#[derive(Default)]
struct FirstPass {
offset: usize,
layout: Layout,
}
struct SecondPass {
output: Vec<u8>,
layout: Layout,
}
impl Output for SecondPass {
fn note_offset(&mut self, _offset_type: Offset) {
}
fn get(&self, offset_type: Offset, idx: usize) -> usize {
let start = self.layout.offset_types[offset_type as usize];
self.layout.offsets[start + idx]
}
fn write(&mut self, data: &[u8]) {
self.output.extend_from_slice(data);
}
fn pad(&mut self, alignment: usize) {
self.output
.resize(round_up(self.output.len(), alignment), 0);
}
fn len(&self) -> usize {
self.output.len()
}
}
impl Output for FirstPass {
fn note_offset(&mut self, offset_type: Offset) {
while self.layout.offset_types.len() <= offset_type as usize {
self.layout.offset_types.push(self.layout.offsets.len());
}
assert_eq!(self.layout.offset_types.len(), offset_type as usize + 1);
debug!(
"{:?} #{} @{}",
offset_type,
self.layout.offsets.len() - self.layout.offset_types[offset_type as usize],
self.offset
);
self.layout.offsets.push(self.offset);
}
fn get(&self, _: Offset, _: usize) -> usize {
0 }
fn write(&mut self, data: &[u8]) {
self.offset += data.len();
}
fn pad(&mut self, alignment: usize) {
self.offset = round_up(self.offset, alignment);
}
fn len(&self) -> usize {
self.offset
}
}
pub fn mkfs_erofs<ObjectID: FsVerityHashValue>(fs: &tree::FileSystem<ObjectID>) -> Box<[u8]> {
let mut inodes = InodeCollector::collect(fs);
let xattrs = share_xattrs(&mut inodes);
let mut first_pass = FirstPass::default();
write_erofs(&mut first_pass, &inodes, &xattrs);
let mut second_pass = SecondPass {
output: vec![],
layout: first_pass.layout,
};
write_erofs(&mut second_pass, &inodes, &xattrs);
second_pass.output.into_boxed_slice()
}