extern crate alloc;
use alloc::boxed::Box;
use alloc::collections::BTreeMap;
use alloc::vec;
use alloc::vec::Vec;
use core::sync::atomic::{AtomicU64, Ordering};
use crate::fscore::structs::{
BLKPTR_SIZE, Blkptr, DmuObjectType, DnodePhys, Hyperblock, LCPFS_MAGIC,
};
use crate::io::pipeline::Pipeline;
use crate::mgmt::format::LcpfsFormatter;
use crate::{FsError, FsResult};
pub const DMU_DEFAULT_BLOCK_SIZE: u64 = 128 * 1024;
#[deprecated(
since = "2026.1.100",
note = "use DMU_DEFAULT_BLOCK_SIZE or get_block_size() instead"
)]
pub const DMU_BLOCK_SIZE: u64 = DMU_DEFAULT_BLOCK_SIZE;
pub const DMU_MIN_BLOCKSIZE: u64 = 512;
pub const DMU_MAX_BLOCKSIZE: u64 = 16 * 1024 * 1024;
static CONFIGURED_BLOCK_SIZE: AtomicU64 = AtomicU64::new(DMU_DEFAULT_BLOCK_SIZE);
pub fn set_default_block_size(size: u64) -> Result<(), &'static str> {
if size == 0 || (size & (size - 1)) != 0 {
return Err("Block size must be a power of 2");
}
if size < DMU_MIN_BLOCKSIZE {
return Err("Block size must be at least 512 bytes");
}
if size > DMU_MAX_BLOCKSIZE {
return Err("Block size must be at most 16 MiB");
}
CONFIGURED_BLOCK_SIZE.store(size, Ordering::SeqCst);
Ok(())
}
pub fn get_block_size() -> u64 {
CONFIGURED_BLOCK_SIZE.load(Ordering::SeqCst)
}
pub const DMU_MAX_ACCESS: u64 = 64 * 1024 * 1024;
const PTRS_PER_BLOCK: u64 = 4096 / BLKPTR_SIZE as u64;
pub const DMU_NEW_OBJECT: u64 = u64::MAX;
pub const DMU_OBJECT_END: u64 = u64::MAX;
pub const DMU_META_DNODE_OBJECT: u64 = 0;
const DNODES_PER_BLOCK: u64 = 8;
const OBJECT_ALLOC_CHUNK_SIZE: u64 = 128;
#[derive(Debug)]
pub struct DmuBuf {
pub db_object: u64,
pub db_offset: u64,
pub db_size: u64,
pub db_data: Vec<u8>,
pub db_dirty: bool,
pub db_dirty_txg: u64,
}
impl DmuBuf {
pub fn new(object: u64, offset: u64, size: u64) -> Self {
Self {
db_object: object,
db_offset: offset,
db_size: size,
db_data: vec![0u8; size as usize],
db_dirty: false,
db_dirty_txg: 0,
}
}
pub fn with_data(object: u64, offset: u64, data: Vec<u8>) -> Self {
let size = data.len() as u64;
Self {
db_object: object,
db_offset: offset,
db_size: size,
db_data: data,
db_dirty: false,
db_dirty_txg: 0,
}
}
pub fn mark_dirty(&mut self, txg: u64) {
self.db_dirty = true;
self.db_dirty_txg = txg;
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TxHoldType {
Write,
Free,
Zap,
Bonus,
SpaceMap,
}
#[derive(Debug)]
pub struct DmuTxHold {
pub txh_object: u64,
pub txh_type: TxHoldType,
pub txh_offset: u64,
pub txh_length: u64,
pub txh_space_towrite: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TxState {
Open,
Assigned,
Committed,
Aborted,
}
pub struct DmuTx {
tx_objset_id: u64,
tx_state: TxState,
tx_txg: u64,
tx_holds: Vec<DmuTxHold>,
tx_space_towrite: u64,
tx_start_time: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TxWaitType {
Wait,
NoWait,
NoThrottle,
}
impl DmuTx {
pub fn create(objset_id: u64) -> Self {
Self {
tx_objset_id: objset_id,
tx_state: TxState::Open,
tx_txg: 0,
tx_holds: Vec::new(),
tx_space_towrite: 0,
tx_start_time: crate::get_time(),
}
}
pub fn hold_write(&mut self, object: u64, offset: u64, length: u64) -> FsResult<()> {
if length > DMU_MAX_ACCESS {
return Err(FsError::InvalidArgument {
reason: "write length exceeds DMU_MAX_ACCESS",
});
}
let block_size = get_block_size();
let space_needed = length + (length / block_size + 1) * BLKPTR_SIZE as u64;
self.tx_holds.push(DmuTxHold {
txh_object: object,
txh_type: TxHoldType::Write,
txh_offset: offset,
txh_length: length,
txh_space_towrite: space_needed,
});
self.tx_space_towrite += space_needed;
Ok(())
}
pub fn hold_free(&mut self, object: u64, offset: u64, length: u64) -> FsResult<()> {
self.tx_holds.push(DmuTxHold {
txh_object: object,
txh_type: TxHoldType::Free,
txh_offset: offset,
txh_length: length,
txh_space_towrite: 0, });
Ok(())
}
pub fn hold_bonus(&mut self, object: u64) -> FsResult<()> {
self.tx_holds.push(DmuTxHold {
txh_object: object,
txh_type: TxHoldType::Bonus,
txh_offset: 0,
txh_length: 0,
txh_space_towrite: BLKPTR_SIZE as u64,
});
self.tx_space_towrite += BLKPTR_SIZE as u64;
Ok(())
}
pub fn hold_zap(&mut self, object: u64) -> FsResult<()> {
let space_needed = get_block_size();
self.tx_holds.push(DmuTxHold {
txh_object: object,
txh_type: TxHoldType::Zap,
txh_offset: 0,
txh_length: 0,
txh_space_towrite: space_needed,
});
self.tx_space_towrite += space_needed;
Ok(())
}
pub fn assign(&mut self, objset: &Objset, _wait: TxWaitType) -> FsResult<()> {
if self.tx_state != TxState::Open {
return Err(FsError::InvalidArgument {
reason: "transaction not in open state",
});
}
if self.tx_space_towrite > 0 {
let allocator = crate::util::alloc::ALLOCATOR.lock();
let available = allocator.total_free;
drop(allocator);
if self.tx_space_towrite > available {
return Err(FsError::DiskFull {
needed_bytes: self.tx_space_towrite,
});
}
}
self.tx_txg = objset.os_txg.load(Ordering::Acquire);
self.tx_state = TxState::Assigned;
Ok(())
}
pub fn commit(mut self) {
if self.tx_state == TxState::Assigned {
self.tx_state = TxState::Committed;
}
}
pub fn abort(mut self) {
if self.tx_state == TxState::Open {
self.tx_state = TxState::Aborted;
}
}
pub fn txg(&self) -> u64 {
self.tx_txg
}
pub fn is_assigned(&self) -> bool {
self.tx_state == TxState::Assigned
}
}
pub struct Dnode {
pub dn_object: u64,
pub dn_phys: DnodePhys,
pub dn_holds: AtomicU64,
pub dn_dirty: bool,
pub dn_dirty_txg: u64,
pub dn_dbufs: BTreeMap<u64, DmuBuf>,
}
impl Dnode {
pub fn new(object: u64, phys: DnodePhys) -> Self {
Self {
dn_object: object,
dn_phys: phys,
dn_holds: AtomicU64::new(0),
dn_dirty: false,
dn_dirty_txg: 0,
dn_dbufs: BTreeMap::new(),
}
}
pub fn allocate(
object: u64,
object_type: DmuObjectType,
_blocksize: u64,
_bonustype: DmuObjectType,
_bonuslen: u8,
) -> Self {
let mut phys = DnodePhys::zero();
phys.object_type = object_type as u8;
phys.nblkptr = 1;
Self::new(object, phys)
}
pub fn hold(&self) {
self.dn_holds.fetch_add(1, Ordering::AcqRel);
}
pub fn rele(&self) -> u64 {
self.dn_holds.fetch_sub(1, Ordering::AcqRel) - 1
}
pub fn set_dirty(&mut self, txg: u64) {
self.dn_dirty = true;
self.dn_dirty_txg = txg;
}
pub fn object_type(&self) -> DmuObjectType {
match self.dn_phys.object_type {
0 => DmuObjectType::None,
17 => DmuObjectType::Znode,
19 => DmuObjectType::PlainFileContents,
20 => DmuObjectType::DirectoryContents,
_ => DmuObjectType::None,
}
}
}
pub struct Objset {
pub os_id: u64,
pub os_rootbp: Blkptr,
pub os_txg: AtomicU64,
pub os_dnodes: BTreeMap<u64, Dnode>,
os_next_object: AtomicU64,
os_free_objects: Vec<u64>,
os_alloc_next: AtomicU64,
pub os_readonly: bool,
os_encryption_key: Option<crate::crypto::aesni::EncryptionKey>,
}
impl Objset {
pub fn new(id: u64) -> Self {
Self {
os_id: id,
os_rootbp: Blkptr::zero(),
os_txg: AtomicU64::new(1),
os_dnodes: BTreeMap::new(),
os_next_object: AtomicU64::new(1), os_free_objects: Vec::new(),
os_alloc_next: AtomicU64::new(1),
os_readonly: false,
os_encryption_key: None,
}
}
pub fn set_encryption_key(&mut self, key: crate::crypto::aesni::EncryptionKey) {
self.os_encryption_key = Some(key);
}
pub fn get_encryption_key(&self) -> Option<&crate::crypto::aesni::EncryptionKey> {
self.os_encryption_key.as_ref()
}
pub fn clear_encryption_key(&mut self) {
self.os_encryption_key = None;
}
pub fn object_alloc(
&mut self,
object_type: DmuObjectType,
blocksize: u64,
bonustype: DmuObjectType,
bonuslen: u8,
tx: &DmuTx,
) -> FsResult<u64> {
if !tx.is_assigned() {
return Err(FsError::InvalidArgument {
reason: "transaction not assigned",
});
}
let object_id = if let Some(id) = self.os_free_objects.pop() {
id
} else {
self.os_next_object.fetch_add(1, Ordering::AcqRel)
};
let mut dnode = Dnode::allocate(object_id, object_type, blocksize, bonustype, bonuslen);
dnode.set_dirty(tx.txg());
self.os_dnodes.insert(object_id, dnode);
Ok(object_id)
}
pub fn object_claim(
&mut self,
object: u64,
object_type: DmuObjectType,
blocksize: u64,
bonustype: DmuObjectType,
bonuslen: u8,
tx: &DmuTx,
) -> FsResult<()> {
if self.os_dnodes.contains_key(&object) {
return Err(FsError::AlreadyExists);
}
let mut dnode = Dnode::allocate(object, object_type, blocksize, bonustype, bonuslen);
dnode.set_dirty(tx.txg());
self.os_dnodes.insert(object, dnode);
let next = self.os_next_object.load(Ordering::Acquire);
if object >= next {
self.os_next_object.store(object + 1, Ordering::Release);
}
Ok(())
}
pub fn object_free(&mut self, object: u64, tx: &DmuTx) -> FsResult<()> {
if !tx.is_assigned() {
return Err(FsError::InvalidArgument {
reason: "transaction not assigned",
});
}
if let Some(dnode) = self.os_dnodes.get_mut(&object) {
dnode.dn_phys.object_type = DmuObjectType::None as u8;
dnode.set_dirty(tx.txg());
self.os_free_objects.push(object);
Ok(())
} else {
Err(FsError::NotFound)
}
}
pub fn dnode_hold(&mut self, object: u64) -> FsResult<&mut Dnode> {
if let Some(dnode) = self.os_dnodes.get_mut(&object) {
dnode.hold();
Ok(dnode)
} else {
Err(FsError::NotFound)
}
}
pub fn dnode_rele(&self, object: u64) {
if let Some(dnode) = self.os_dnodes.get(&object) {
dnode.rele();
}
}
pub fn object_info(&self, object: u64) -> FsResult<ObjectInfo> {
if let Some(dnode) = self.os_dnodes.get(&object) {
let block_size = get_block_size();
Ok(ObjectInfo {
doi_type: dnode.object_type(),
doi_bonus_type: DmuObjectType::None,
doi_bonus_size: 0,
doi_indirection: dnode.dn_phys.indirection_levels,
doi_data_block_size: block_size as u32,
doi_physical_blocks_512: (dnode.dn_phys.used_bytes / 512),
doi_max_offset: dnode.dn_phys.max_blkid * block_size,
})
} else {
Err(FsError::NotFound)
}
}
pub fn read(&mut self, object: u64, offset: u64, length: usize) -> FsResult<Vec<u8>> {
let dnode = self.os_dnodes.get(&object).ok_or(FsError::NotFound)?;
let key = self
.os_encryption_key
.as_ref()
.map(|k| k.key)
.unwrap_or([0u8; 32]);
Self::read_dnode_data_with_key(&dnode.dn_phys, offset, length, &key)
}
pub fn write(&mut self, object: u64, offset: u64, data: &[u8], tx: &DmuTx) -> FsResult<()> {
if !tx.is_assigned() {
return Err(FsError::InvalidArgument {
reason: "transaction not assigned",
});
}
let txg = tx.txg();
let dnode = self.os_dnodes.get_mut(&object).ok_or(FsError::NotFound)?;
let key = self
.os_encryption_key
.as_ref()
.map(|k| k.key)
.unwrap_or([0u8; 32]);
Self::write_dnode_data_with_key(&mut dnode.dn_phys, offset, data, txg, &key)?;
dnode.set_dirty(txg);
Ok(())
}
pub fn sync(&mut self, _txg: u64) -> FsResult<()> {
self.txg_sync(0)?;
Ok(())
}
fn read_dnode_data(dnode: &DnodePhys, offset: u64, length: usize) -> FsResult<Vec<u8>> {
Self::read_dnode_data_with_key(dnode, offset, length, &[0u8; 32])
}
fn read_dnode_data_with_key(
dnode: &DnodePhys,
offset: u64,
length: usize,
key: &[u8; 32],
) -> FsResult<Vec<u8>> {
const BLOCK_SIZE: u64 = 4096;
if length == 0 {
return Ok(Vec::new());
}
let start_block = offset / BLOCK_SIZE;
let end_block = (offset + length as u64).div_ceil(BLOCK_SIZE);
let mut result_buffer = Vec::with_capacity(length);
for blk_idx in start_block..end_block {
let l0_bp = Self::traverse_with_key(dnode, blk_idx, key)?;
if l0_bp.is_hole() {
let zeros_needed =
core::cmp::min(BLOCK_SIZE as usize, length - result_buffer.len());
result_buffer.extend(core::iter::repeat_n(0u8, zeros_needed));
continue;
}
let raw_block = Pipeline::read_block_auto_nonce(&l0_bp, key)?;
let block_offset = if blk_idx == start_block {
offset % BLOCK_SIZE
} else {
0
};
let copy_len = core::cmp::min(
raw_block.len() as u64 - block_offset,
length as u64 - result_buffer.len() as u64,
) as usize;
let start = block_offset as usize;
let end = start + copy_len;
if end <= raw_block.len() {
result_buffer.extend_from_slice(&raw_block[start..end]);
}
}
Ok(result_buffer)
}
fn traverse(dnode: &DnodePhys, logical_block_index: u64) -> FsResult<Blkptr> {
Self::traverse_with_key(dnode, logical_block_index, &[0u8; 32])
}
fn traverse_with_key(
dnode: &DnodePhys,
logical_block_index: u64,
key: &[u8; 32],
) -> FsResult<Blkptr> {
const BLOCK_SIZE: u64 = 4096;
let level = dnode.indirection_levels;
if level == 0 {
if logical_block_index >= dnode.nblkptr as u64 {
return Ok(Blkptr::zero());
}
return Ok(dnode.blkptr[logical_block_index as usize]);
}
let blocks_per_l1 = PTRS_PER_BLOCK;
let blocks_per_l2 = PTRS_PER_BLOCK * PTRS_PER_BLOCK;
let blocks_per_l3 = PTRS_PER_BLOCK * PTRS_PER_BLOCK * PTRS_PER_BLOCK;
let (top_bp_idx, remaining_index) = match level {
1 => {
let idx = logical_block_index / blocks_per_l1;
(idx as usize, logical_block_index % blocks_per_l1)
}
2 => {
let idx = logical_block_index / blocks_per_l2;
(idx as usize, logical_block_index % blocks_per_l2)
}
3 => {
let idx = logical_block_index / blocks_per_l3;
(idx as usize, logical_block_index % blocks_per_l3)
}
_ => {
let blocks_at_level = PTRS_PER_BLOCK.pow(level as u32);
let idx = logical_block_index / blocks_at_level;
(idx as usize, logical_block_index % blocks_at_level)
}
};
if top_bp_idx >= dnode.nblkptr as usize {
return Ok(Blkptr::zero());
}
let mut current_bp = dnode.blkptr[top_bp_idx];
let mut current_index = remaining_index;
let mut current_level = level;
while current_level > 0 {
if current_bp.is_hole() {
return Ok(Blkptr::zero());
}
let indirect_data = Pipeline::read_block_auto_nonce(¤t_bp, key)?;
let divisor = PTRS_PER_BLOCK.pow((current_level - 1) as u32);
let ptr_index = (current_index / divisor) as usize;
current_index %= divisor;
let ptr_offset = ptr_index * BLKPTR_SIZE;
if ptr_offset + BLKPTR_SIZE > indirect_data.len() {
return Err(FsError::Corruption {
block: logical_block_index,
details: "Indirect block truncated",
});
}
let ptr_slice = &indirect_data[ptr_offset..ptr_offset + BLKPTR_SIZE];
current_bp = unsafe { core::ptr::read_unaligned(ptr_slice.as_ptr() as *const Blkptr) };
current_level -= 1;
}
Ok(current_bp)
}
fn write_dnode_data(dnode: &mut DnodePhys, offset: u64, data: &[u8], txg: u64) -> FsResult<()> {
Self::write_dnode_data_with_key(dnode, offset, data, txg, &[0u8; 32])
}
fn write_dnode_data_with_key(
dnode: &mut DnodePhys,
offset: u64,
data: &[u8],
txg: u64,
key: &[u8; 32],
) -> FsResult<()> {
const BLOCK_SIZE: u64 = 4096;
if data.is_empty() {
return Ok(());
}
let start_block = offset / BLOCK_SIZE;
let end_block = (offset + data.len() as u64).div_ceil(BLOCK_SIZE);
let max_block_idx = end_block - 1;
let required_level = Self::required_indirection_level(max_block_idx);
if required_level > dnode.indirection_levels {
Self::grow_indirection(dnode, required_level, txg)?;
}
let mut data_offset = 0usize;
for blk_idx in start_block..end_block {
let block_offset = if blk_idx == start_block {
(offset % BLOCK_SIZE) as usize
} else {
0
};
let copy_len =
core::cmp::min(BLOCK_SIZE as usize - block_offset, data.len() - data_offset);
let mut block_data = [0u8; 4096];
if block_offset > 0 || copy_len < BLOCK_SIZE as usize {
match Self::traverse_with_key(dnode, blk_idx, key) {
Ok(existing_bp) => {
if !existing_bp.is_hole() {
let existing_data = Pipeline::read_block_auto_nonce(&existing_bp, key)
.map_err(|e| {
crate::lcpfs_println!(
"[ DMU ] ERROR: Read-modify-write failed - cannot read \
existing block {} for partial write: {:?}",
blk_idx,
e
);
e
})?;
let copy_size = core::cmp::min(existing_data.len(), 4096);
block_data[..copy_size].copy_from_slice(&existing_data[..copy_size]);
}
}
Err(_) => {
}
}
}
block_data[block_offset..block_offset + copy_len]
.copy_from_slice(&data[data_offset..data_offset + copy_len]);
let new_bp = Pipeline::write_block_full(&block_data, key, txg)?;
Self::update_block_pointer(dnode, blk_idx, new_bp, txg)?;
data_offset += copy_len;
}
let new_size = offset + data.len() as u64;
if new_size > dnode.used_bytes {
dnode.used_bytes = new_size;
}
if max_block_idx > dnode.max_blkid {
dnode.max_blkid = max_block_idx;
}
Ok(())
}
fn required_indirection_level(block_idx: u64) -> u8 {
const PTRS_PER_INDIRECT: u64 = 4096 / BLKPTR_SIZE as u64;
if block_idx < 3 {
0 } else if block_idx < 3 * PTRS_PER_INDIRECT {
1 } else if block_idx < 3 * PTRS_PER_INDIRECT * PTRS_PER_INDIRECT {
2 } else {
3 }
}
fn grow_indirection(dnode: &mut DnodePhys, target_level: u8, txg: u64) -> FsResult<()> {
const BLOCK_SIZE: usize = 4096;
let key = [0u8; 32];
while dnode.indirection_levels < target_level {
for i in 0..dnode.nblkptr as usize {
let old_bp = dnode.blkptr[i];
if old_bp.is_hole() {
continue;
}
let mut indirect_block = [0u8; BLOCK_SIZE];
unsafe {
let bp_bytes = core::slice::from_raw_parts(
&old_bp as *const Blkptr as *const u8,
BLKPTR_SIZE,
);
indirect_block[..BLKPTR_SIZE].copy_from_slice(bp_bytes);
}
let indirect_bp = Pipeline::write_block_full(&indirect_block, &key, txg)?;
dnode.blkptr[i] = indirect_bp;
}
dnode.indirection_levels += 1;
}
Ok(())
}
fn update_block_pointer(
dnode: &mut DnodePhys,
block_idx: u64,
new_bp: Blkptr,
txg: u64,
) -> FsResult<()> {
const BLOCK_SIZE: usize = 4096;
const PTRS_PER_INDIRECT: u64 = (BLOCK_SIZE / BLKPTR_SIZE) as u64;
let level = dnode.indirection_levels;
let key = [0u8; 32];
if level == 0 {
if block_idx >= 3 {
return Err(FsError::DiskFull {
needed_bytes: block_idx * 4096,
});
}
dnode.blkptr[block_idx as usize] = new_bp;
if block_idx as u8 >= dnode.nblkptr {
dnode.nblkptr = block_idx as u8 + 1;
}
return Ok(());
}
let blocks_per_top = PTRS_PER_INDIRECT.pow(level as u32);
let top_idx = (block_idx / blocks_per_top) as usize;
if top_idx >= 3 {
return Err(FsError::DiskFull {
needed_bytes: block_idx * 4096,
});
}
let mut path_indices = Vec::new();
let mut remaining = block_idx % blocks_per_top;
for l in (1..=level).rev() {
let divisor = PTRS_PER_INDIRECT.pow((l - 1) as u32);
path_indices.push((remaining / divisor) as usize);
remaining %= divisor;
}
let mut indirect_blocks: Vec<Vec<u8>> = Vec::new();
let mut current_bp = dnode.blkptr[top_idx];
for depth in 0..level as usize {
let block_data = if current_bp.is_hole() {
vec![0u8; BLOCK_SIZE]
} else {
Pipeline::read_block_auto_nonce(¤t_bp, &key)?
};
indirect_blocks.push(block_data);
if depth < (level - 1) as usize {
let idx = path_indices[depth];
let ptr_offset = idx * BLKPTR_SIZE;
if ptr_offset + BLKPTR_SIZE <= indirect_blocks[depth].len() {
let ptr_slice = &indirect_blocks[depth][ptr_offset..ptr_offset + BLKPTR_SIZE];
current_bp =
unsafe { core::ptr::read_unaligned(ptr_slice.as_ptr() as *const Blkptr) };
} else {
current_bp = Blkptr::zero();
}
}
}
let leaf_idx = path_indices[level as usize - 1];
let ptr_offset = leaf_idx * BLKPTR_SIZE;
unsafe {
let bp_bytes =
core::slice::from_raw_parts(&new_bp as *const Blkptr as *const u8, BLKPTR_SIZE);
let leaf = indirect_blocks.last_mut().unwrap();
leaf[ptr_offset..ptr_offset + BLKPTR_SIZE].copy_from_slice(bp_bytes);
}
let mut child_bp = Blkptr::zero();
for depth in (0..level as usize).rev() {
let mut block_data = indirect_blocks[depth].clone();
if depth < (level - 1) as usize {
let idx = path_indices[depth];
let ptr_offset = idx * BLKPTR_SIZE;
unsafe {
let bp_bytes = core::slice::from_raw_parts(
&child_bp as *const Blkptr as *const u8,
BLKPTR_SIZE,
);
block_data[ptr_offset..ptr_offset + BLKPTR_SIZE].copy_from_slice(bp_bytes);
}
}
let written_bp = Pipeline::write_block_full(&block_data, &key, txg)?;
if depth == 0 {
dnode.blkptr[top_idx] = written_bp;
if top_idx as u8 >= dnode.nblkptr {
dnode.nblkptr = top_idx as u8 + 1;
}
} else {
child_bp = written_bp;
}
}
Ok(())
}
pub fn max_file_size(indirection_level: u8) -> u64 {
const BLOCK_SIZE: u64 = 4096;
match indirection_level {
0 => 3 * BLOCK_SIZE,
1 => 3 * PTRS_PER_BLOCK * BLOCK_SIZE,
2 => 3 * PTRS_PER_BLOCK * PTRS_PER_BLOCK * BLOCK_SIZE,
3 => 3 * PTRS_PER_BLOCK * PTRS_PER_BLOCK * PTRS_PER_BLOCK * BLOCK_SIZE,
_ => u64::MAX,
}
}
pub fn txg_sync(&mut self, _dev_id: usize) -> FsResult<u64> {
let current_txg = self.os_txg.load(Ordering::Acquire);
crate::lcpfs_println!(
"[ TXG ] Syncing TXG {} ({} dnodes)",
current_txg,
self.os_dnodes.len()
);
let key = self
.os_encryption_key
.as_ref()
.map(|k| k.key)
.unwrap_or([0u8; 32]);
let mut write_results: Vec<(u64, Result<(), FsError>)> = Vec::new();
for (object_id, dnode) in self.os_dnodes.iter() {
if dnode.dn_dirty {
let dnode_bytes = unsafe {
core::slice::from_raw_parts(
&dnode.dn_phys as *const DnodePhys as *const u8,
core::mem::size_of::<DnodePhys>(),
)
};
let result = Pipeline::write_block(dnode_bytes, &key, current_txg).map(|_| ());
write_results.push((*object_id, result));
}
}
let mut failed_objects: Vec<u64> = Vec::new();
for (object_id, result) in &write_results {
if result.is_err() {
failed_objects.push(*object_id);
}
}
if !failed_objects.is_empty() {
crate::lcpfs_println!(
"[ TXG ] FAILED: {} dnodes failed to sync: {:?}",
failed_objects.len(),
failed_objects
);
return Err(FsError::IoError {
vdev: 0,
reason: "txg_sync failed: one or more dnodes could not be written",
});
}
let synced_count = write_results.len();
for (object_id, _) in &write_results {
if let Some(dnode) = self.os_dnodes.get_mut(object_id) {
dnode.dn_dirty = false;
dnode.dn_dirty_txg = 0;
}
}
if synced_count > 0 {
let mut meta_block = Vec::new();
let dnode_size = core::mem::size_of::<DnodePhys>();
for (object_id, dnode) in self.os_dnodes.iter() {
meta_block.extend_from_slice(&object_id.to_le_bytes());
let dnode_bytes = unsafe {
core::slice::from_raw_parts(
&dnode.dn_phys as *const DnodePhys as *const u8,
dnode_size,
)
};
meta_block.extend_from_slice(dnode_bytes);
}
match Pipeline::write_block_full(&meta_block, &key, current_txg) {
Ok(meta_bp) => {
self.os_rootbp = meta_bp;
crate::lcpfs_println!(
"[ TXG ] Updated rootbp to DVA {:x}",
meta_bp.dva[0].offset
);
let hb = Hyperblock {
magic: LCPFS_MAGIC,
version: 1,
txg: current_txg,
guid_sum: 0, timestamp: 0, rootbp: self.os_rootbp,
};
if let Err(e) = LcpfsFormatter::write_hyperblock(_dev_id, &hb) {
crate::lcpfs_println!("[ TXG ] ERROR: Failed to write hyperblock: {}", e);
return Err(FsError::IoError {
vdev: _dev_id,
reason: "Failed to write hyperblock after txg_sync",
});
}
crate::lcpfs_println!("[ TXG ] Hyperblock written for TXG {}", current_txg);
}
Err(e) => {
crate::lcpfs_println!("[ TXG ] ERROR: Failed to write meta-dnode: {:?}", e);
return Err(e);
}
}
}
let next_txg = current_txg + 1;
self.os_txg.store(next_txg, Ordering::Release);
crate::lcpfs_println!(
"[ TXG ] Synced {} dirty dnodes, advanced to TXG {}",
synced_count,
next_txg
);
Ok(current_txg)
}
}
#[derive(Debug, Clone, Copy)]
pub struct ObjectInfo {
pub doi_type: DmuObjectType,
pub doi_bonus_type: DmuObjectType,
pub doi_bonus_size: u32,
pub doi_indirection: u8,
pub doi_data_block_size: u32,
pub doi_physical_blocks_512: u64,
pub doi_max_offset: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TxgState {
Open,
Quiescing,
Syncing,
Committed,
}
pub struct Txg {
pub txg_id: u64,
pub txg_state: TxgState,
pub txg_tx_count: AtomicU64,
pub txg_space_used: AtomicU64,
}
impl Txg {
pub fn new(id: u64) -> Self {
Self {
txg_id: id,
txg_state: TxgState::Open,
txg_tx_count: AtomicU64::new(0),
txg_space_used: AtomicU64::new(0),
}
}
pub fn is_open(&self) -> bool {
self.txg_state == TxgState::Open
}
pub fn quiesce(&mut self) {
self.txg_state = TxgState::Quiescing;
}
pub fn sync_start(&mut self) {
self.txg_state = TxgState::Syncing;
}
pub fn commit(&mut self) {
self.txg_state = TxgState::Committed;
}
}
pub fn dmu_tx_create(objset: &Objset) -> DmuTx {
DmuTx::create(objset.os_id)
}
pub fn dmu_tx_hold_write(tx: &mut DmuTx, object: u64, offset: u64, length: u64) -> FsResult<()> {
tx.hold_write(object, offset, length)
}
pub fn dmu_tx_hold_free(tx: &mut DmuTx, object: u64, offset: u64, length: u64) -> FsResult<()> {
tx.hold_free(object, offset, length)
}
pub fn dmu_tx_assign(tx: &mut DmuTx, objset: &Objset, wait: TxWaitType) -> FsResult<()> {
tx.assign(objset, wait)
}
pub fn dmu_tx_commit(tx: DmuTx) {
tx.commit()
}
pub fn dmu_tx_abort(tx: DmuTx) {
tx.abort()
}
pub fn dmu_object_alloc(
objset: &mut Objset,
object_type: DmuObjectType,
blocksize: u64,
bonustype: DmuObjectType,
bonuslen: u8,
tx: &DmuTx,
) -> FsResult<u64> {
objset.object_alloc(object_type, blocksize, bonustype, bonuslen, tx)
}
pub fn dmu_object_free(objset: &mut Objset, object: u64, tx: &DmuTx) -> FsResult<()> {
objset.object_free(object, tx)
}
pub fn dmu_read(objset: &mut Objset, object: u64, offset: u64, length: usize) -> FsResult<Vec<u8>> {
objset.read(object, offset, length)
}
pub fn dmu_write(
objset: &mut Objset,
object: u64,
offset: u64,
data: &[u8],
tx: &DmuTx,
) -> FsResult<()> {
objset.write(object, offset, data, tx)
}
pub fn dmu_object_info(objset: &Objset, object: u64) -> FsResult<ObjectInfo> {
objset.object_info(object)
}
pub struct ObjectSet {
pub root_bp: Blkptr,
}
impl ObjectSet {
pub fn read_dnode_data(dnode: &DnodePhys, offset: u64, length: usize) -> FsResult<Vec<u8>> {
Objset::read_dnode_data(dnode, offset, length)
}
pub fn write_dnode_data(
dnode: &mut DnodePhys,
offset: u64,
data: &[u8],
txg: u64,
) -> FsResult<()> {
Objset::write_dnode_data(dnode, offset, data, txg)
}
pub fn max_file_size(indirection_level: u8) -> u64 {
Objset::max_file_size(indirection_level)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_required_indirection_level() {
assert_eq!(Objset::required_indirection_level(0), 0);
assert_eq!(Objset::required_indirection_level(1), 0);
assert_eq!(Objset::required_indirection_level(2), 0);
assert_eq!(Objset::required_indirection_level(3), 1);
assert_eq!(Objset::required_indirection_level(32), 1);
assert_eq!(Objset::required_indirection_level(95), 1);
assert_eq!(Objset::required_indirection_level(96), 2);
assert_eq!(Objset::required_indirection_level(1000), 2);
assert_eq!(Objset::required_indirection_level(3071), 2);
assert_eq!(Objset::required_indirection_level(3072), 3);
assert_eq!(Objset::required_indirection_level(100000), 3);
}
#[test]
fn test_max_file_size() {
assert_eq!(Objset::max_file_size(0), 3 * 4096);
assert_eq!(Objset::max_file_size(1), 3 * 32 * 4096);
assert_eq!(Objset::max_file_size(2), 3 * 32 * 32 * 4096);
assert_eq!(Objset::max_file_size(3), 3 * 32 * 32 * 32 * 4096);
}
#[test]
fn test_transaction_lifecycle() {
let objset = Objset::new(1);
let mut tx = DmuTx::create(objset.os_id);
assert!(!tx.is_assigned());
assert_eq!(tx.txg(), 0);
tx.hold_write(1, 0, 4096).unwrap();
assert!(tx.tx_space_towrite > 0);
assert!(!tx.is_assigned());
}
#[test]
fn test_dnode_allocation() {
let dnode = Dnode::allocate(
1,
DmuObjectType::PlainFileContents,
4096,
DmuObjectType::None,
0,
);
assert_eq!(dnode.dn_object, 1);
assert_eq!(dnode.object_type(), DmuObjectType::PlainFileContents);
assert_eq!(dnode.dn_phys.indirection_levels, 0);
assert_eq!(dnode.dn_phys.nblkptr, 1);
}
#[test]
fn test_objset_object_alloc() {
let mut objset = Objset::new(1);
let mut tx = DmuTx::create(objset.os_id);
tx.hold_write(DMU_NEW_OBJECT, 0, 0).unwrap();
tx.tx_state = TxState::Assigned;
tx.tx_txg = 1;
let obj_id = objset
.object_alloc(
DmuObjectType::PlainFileContents,
4096,
DmuObjectType::None,
0,
&tx,
)
.unwrap();
assert_eq!(obj_id, 1); assert!(objset.os_dnodes.contains_key(&obj_id));
}
#[test]
fn test_object_info() {
let mut objset = Objset::new(1);
let mut tx = DmuTx::create(objset.os_id);
tx.tx_state = TxState::Assigned;
tx.tx_txg = 1;
let obj_id = objset
.object_alloc(
DmuObjectType::PlainFileContents,
4096,
DmuObjectType::None,
0,
&tx,
)
.unwrap();
let info = objset.object_info(obj_id).unwrap();
assert_eq!(info.doi_type, DmuObjectType::PlainFileContents);
assert_eq!(info.doi_indirection, 0);
}
#[test]
fn test_txg_states() {
let mut txg = Txg::new(1);
assert!(txg.is_open());
assert_eq!(txg.txg_state, TxgState::Open);
txg.quiesce();
assert_eq!(txg.txg_state, TxgState::Quiescing);
assert!(!txg.is_open());
txg.sync_start();
assert_eq!(txg.txg_state, TxgState::Syncing);
txg.commit();
assert_eq!(txg.txg_state, TxgState::Committed);
}
#[test]
fn test_dmu_buf() {
let mut buf = DmuBuf::new(1, 0, 4096);
assert_eq!(buf.db_object, 1);
assert_eq!(buf.db_offset, 0);
assert_eq!(buf.db_size, 4096);
assert!(!buf.db_dirty);
buf.mark_dirty(1);
assert!(buf.db_dirty);
assert_eq!(buf.db_dirty_txg, 1);
}
#[test]
fn test_dmu_buf_with_data() {
let data = vec![1, 2, 3, 4, 5];
let buf = DmuBuf::with_data(1, 0, data.clone());
assert_eq!(buf.db_data, data);
assert_eq!(buf.db_size, 5);
}
#[test]
fn test_dnode_hold_rele() {
let dnode = Dnode::new(1, DnodePhys::zero());
assert_eq!(dnode.dn_holds.load(Ordering::Acquire), 0);
dnode.hold();
assert_eq!(dnode.dn_holds.load(Ordering::Acquire), 1);
dnode.hold();
assert_eq!(dnode.dn_holds.load(Ordering::Acquire), 2);
let count = dnode.rele();
assert_eq!(count, 1);
assert_eq!(dnode.dn_holds.load(Ordering::Acquire), 1);
}
#[test]
fn test_block_size_config() {
assert!(set_default_block_size(4096).is_ok());
assert_eq!(get_block_size(), 4096);
assert!(set_default_block_size(512).is_ok());
assert_eq!(get_block_size(), 512);
assert!(set_default_block_size(16 * 1024 * 1024).is_ok());
assert_eq!(get_block_size(), 16 * 1024 * 1024);
assert!(set_default_block_size(0).is_err());
assert!(set_default_block_size(1000).is_err()); assert!(set_default_block_size(256).is_err()); assert!(set_default_block_size(32 * 1024 * 1024).is_err());
let _ = set_default_block_size(DMU_DEFAULT_BLOCK_SIZE);
}
}