use crate::{
balance,
buffer::{
ExtentBuffer, HEADER_SIZE, ITEM_SIZE, KEY_PTR_SIZE,
debug_assert_leaf_valid, debug_assert_node_valid, key_cmp,
},
filesystem::Filesystem,
path::BtrfsPath,
transaction::Transaction,
};
use btrfs_disk::tree::DiskKey;
use std::io::{self, Read, Seek, Write};
#[allow(clippy::too_many_lines)]
pub fn split_leaf<R: Read + Write + Seek>(
trans: &mut Transaction<R>,
fs_info: &mut Filesystem<R>,
path: &mut BtrfsPath,
tree_id: u64,
key: &DiskKey,
data_size: u32,
) -> io::Result<()> {
if balance::push_leaf_right(trans, fs_info, path, tree_id)? > 0 {
let leaf = path.nodes[0].as_ref().unwrap();
if leaf.leaf_free_space() >= data_size {
return Ok(());
}
}
if balance::push_leaf_left(trans, fs_info, path, tree_id)? > 0 {
let leaf = path.nodes[0].as_ref().unwrap();
if leaf.leaf_free_space() >= data_size {
return Ok(());
}
}
let leaf = path.nodes[0]
.as_ref()
.ok_or_else(|| io::Error::other("split_leaf: no leaf in path"))?;
let nritems = leaf.nritems() as usize;
let nodesize = leaf.nodesize();
let total_bytes: u32 = (0..nritems)
.map(|i| ITEM_SIZE as u32 + leaf.item_size(i))
.sum();
let half = total_bytes / 2;
let mut running = 0u32;
let mut split = nritems / 2; for i in 0..nritems {
running += ITEM_SIZE as u32 + leaf.item_size(i);
if running >= half {
split = (i + 1).clamp(1, nritems - 1);
break;
}
}
let new_logical = trans.alloc_tree_block(fs_info, tree_id, 0)?;
let mut new_leaf = ExtentBuffer::new_zeroed(nodesize, new_logical);
new_leaf.set_bytenr(new_logical);
new_leaf.set_level(0);
new_leaf.set_generation(fs_info.generation);
new_leaf.set_owner(leaf.owner());
new_leaf.set_fsid(&leaf.fsid());
new_leaf.set_chunk_tree_uuid(&leaf.chunk_tree_uuid());
new_leaf.set_flags(leaf.flags());
new_leaf.set_nritems(0);
let leaf = path.nodes[0].as_ref().unwrap();
let move_count = nritems - split;
let mut new_data_end = nodesize;
for i in 0..move_count {
let src_slot = split + i;
let src_key = leaf.item_key(src_slot);
let src_size = leaf.item_size(src_slot);
let src_data = leaf.item_data(src_slot).to_vec();
new_data_end -= src_size;
let new_offset = new_data_end - HEADER_SIZE as u32;
new_leaf.set_item_key(i, &src_key);
new_leaf.set_item_offset(i, new_offset);
new_leaf.set_item_size(i, src_size);
new_leaf.as_bytes_mut()
[new_data_end as usize..new_data_end as usize + src_size as usize]
.copy_from_slice(&src_data);
}
new_leaf.set_nritems(move_count as u32);
let old_leaf = path.nodes[0].as_mut().unwrap();
debug_assert_eq!(
old_leaf.generation(),
fs_info.generation,
"split_leaf: old leaf at {} has stale generation {} (expected {})",
old_leaf.logical(),
old_leaf.generation(),
fs_info.generation
);
old_leaf.set_nritems(split as u32);
if split > 0 {
let mut data_end = nodesize;
for i in 0..split {
let size = old_leaf.item_size(i);
let old_data = old_leaf.item_data(i).to_vec();
data_end -= size;
let new_offset = data_end - HEADER_SIZE as u32;
old_leaf.set_item_offset(i, new_offset);
let abs_off = data_end as usize;
old_leaf.as_bytes_mut()[abs_off..abs_off + size as usize]
.copy_from_slice(&old_data);
}
}
debug_assert!(
old_leaf.nritems() > 0,
"split_leaf: old leaf at {} is empty after split",
old_leaf.logical(),
);
debug_assert!(
new_leaf.nritems() > 0,
"split_leaf: new leaf at {} is empty after split",
new_leaf.logical(),
);
debug_assert_leaf_valid(old_leaf);
debug_assert_leaf_valid(&new_leaf);
debug_assert!(
key_cmp(
&old_leaf.item_key(old_leaf.nritems() as usize - 1),
&new_leaf.item_key(0),
) == std::cmp::Ordering::Less,
"split_leaf: old leaf's last key >= new leaf's first key",
);
fs_info.mark_dirty(old_leaf);
fs_info.mark_dirty(&new_leaf);
let new_leaf_first_key = new_leaf.item_key(0);
insert_ptr_in_parent(
trans,
fs_info,
path,
tree_id,
&new_leaf_first_key,
new_logical,
0, )?;
if key_cmp(key, &new_leaf_first_key) != std::cmp::Ordering::Less {
let slot_in_new = crate::search::leaf_bin_search(&new_leaf, key).slot;
path.nodes[0] = Some(new_leaf);
path.slots[0] = slot_in_new;
}
Ok(())
}
pub fn split_node<R: Read + Write + Seek>(
trans: &mut Transaction<R>,
fs_info: &mut Filesystem<R>,
path: &mut BtrfsPath,
tree_id: u64,
level: u8,
) -> io::Result<()> {
let node = path.nodes[level as usize]
.as_ref()
.ok_or_else(|| io::Error::other("split_node: no node at level"))?;
let nritems = node.nritems() as usize;
let nodesize = node.nodesize();
let split = nritems / 2;
let new_logical = trans.alloc_tree_block(fs_info, tree_id, level)?;
let mut new_node = ExtentBuffer::new_zeroed(nodesize, new_logical);
new_node.set_bytenr(new_logical);
new_node.set_level(level);
new_node.set_generation(fs_info.generation);
new_node.set_owner(node.owner());
new_node.set_fsid(&node.fsid());
new_node.set_chunk_tree_uuid(&node.chunk_tree_uuid());
new_node.set_flags(node.flags());
let node = path.nodes[level as usize].as_ref().unwrap();
let move_count = nritems - split;
for i in 0..move_count {
let src_slot = split + i;
let key = node.key_ptr_key(src_slot);
let blockptr = node.key_ptr_blockptr(src_slot);
let kp_gen = node.key_ptr_generation(src_slot);
new_node.set_key_ptr(i, &key, blockptr, kp_gen);
}
new_node.set_nritems(move_count as u32);
let old_node = path.nodes[level as usize].as_mut().unwrap();
old_node.set_nritems(split as u32);
debug_assert!(
old_node.nritems() > 0,
"split_node: old node at {} is empty after split",
old_node.logical(),
);
debug_assert!(
new_node.nritems() > 0,
"split_node: new node at {} is empty after split",
new_node.logical(),
);
debug_assert_node_valid(old_node);
debug_assert_node_valid(&new_node);
fs_info.mark_dirty(old_node);
fs_info.mark_dirty(&new_node);
let new_node_first_key = new_node.key_ptr_key(0);
insert_ptr_in_parent(
trans,
fs_info,
path,
tree_id,
&new_node_first_key,
new_logical,
level, )?;
Ok(())
}
fn insert_ptr_in_parent<R: Read + Write + Seek>(
trans: &mut Transaction<R>,
fs_info: &mut Filesystem<R>,
path: &mut BtrfsPath,
tree_id: u64,
key: &DiskKey,
child_logical: u64,
child_level: u8,
) -> io::Result<()> {
let parent_level = find_parent_level_above(path, child_level as usize);
if parent_level.is_none() {
return create_new_root(
trans,
fs_info,
path,
tree_id,
key,
child_logical,
);
}
let parent_level = parent_level.unwrap();
{
let parent = path.nodes[parent_level]
.as_ref()
.ok_or_else(|| io::Error::other("insert_ptr: parent missing"))?;
let parent_nritems = parent.nritems() as usize;
let max_ptrs = parent.max_key_ptrs() as usize;
if parent_nritems >= max_ptrs {
split_node(trans, fs_info, path, tree_id, parent_level as u8)?;
let gp_level = find_parent_level_above(path, parent_level);
if let Some(gp) = gp_level {
let gp_node = path.nodes[gp].as_ref().unwrap();
let gp_slot = crate::search::node_bin_search(gp_node, key);
let target_bytenr = gp_node.key_ptr_blockptr(gp_slot);
let target_node = fs_info.read_block(target_bytenr)?;
path.nodes[parent_level] = Some(target_node);
path.slots[parent_level] = crate::search::node_bin_search(
path.nodes[parent_level].as_ref().unwrap(),
key,
);
path.slots[gp] = gp_slot;
}
return insert_ptr_in_parent(
trans,
fs_info,
path,
tree_id,
key,
child_logical,
child_level,
);
}
}
let parent = path.nodes[parent_level].as_mut().unwrap();
let parent_nritems = parent.nritems() as usize;
let slot = path.slots[parent_level] + 1;
if slot < parent_nritems {
let src = HEADER_SIZE + slot * KEY_PTR_SIZE;
let len = (parent_nritems - slot) * KEY_PTR_SIZE;
parent.copy_within(src..src + len, src + KEY_PTR_SIZE);
}
parent.set_key_ptr(slot, key, child_logical, fs_info.generation);
parent.set_nritems(parent_nritems as u32 + 1);
debug_assert_node_valid(parent);
fs_info.mark_dirty(parent);
Ok(())
}
fn find_parent_level_above(
path: &BtrfsPath,
min_level: usize,
) -> Option<usize> {
(min_level + 1..path.nodes.len()).find(|&level| path.nodes[level].is_some())
}
fn create_new_root<R: Read + Write + Seek>(
trans: &mut Transaction<R>,
fs_info: &mut Filesystem<R>,
path: &mut BtrfsPath,
tree_id: u64,
right_key: &DiskKey,
right_logical: u64,
) -> io::Result<()> {
let old_root_level = {
let mut lvl = 0;
for (i, node) in path.nodes.iter().enumerate() {
if node.is_some() {
lvl = i;
}
}
lvl
};
let old_root = path.nodes[old_root_level].as_ref().unwrap();
let old_root_logical = old_root.logical();
let old_root_key = if old_root.is_leaf() {
old_root.item_key(0)
} else {
old_root.key_ptr_key(0)
};
let new_level = old_root.level() + 1;
let new_logical = trans.alloc_tree_block(fs_info, tree_id, new_level)?;
let mut new_root =
ExtentBuffer::new_zeroed(old_root.nodesize(), new_logical);
new_root.set_bytenr(new_logical);
new_root.set_level(new_level);
new_root.set_generation(fs_info.generation);
new_root.set_owner(old_root.owner());
new_root.set_fsid(&old_root.fsid());
new_root.set_chunk_tree_uuid(&old_root.chunk_tree_uuid());
new_root.set_flags(old_root.flags());
new_root.set_nritems(2);
debug_assert_eq!(
old_root.generation(),
fs_info.generation,
"create_new_root: old root at {} has generation {}, expected {}",
old_root_logical,
old_root.generation(),
fs_info.generation
);
new_root.set_key_ptr(
0,
&old_root_key,
old_root_logical,
fs_info.generation,
);
new_root.set_key_ptr(1, right_key, right_logical, fs_info.generation);
debug_assert_node_valid(&new_root);
fs_info.mark_dirty(&new_root);
fs_info.set_root_bytenr(tree_id, new_logical);
path.nodes[new_level as usize] = Some(new_root);
path.slots[new_level as usize] = 0;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{buffer::ExtentBuffer, items};
use btrfs_disk::tree::KeyType;
fn make_key(oid: u64) -> DiskKey {
DiskKey {
objectid: oid,
key_type: KeyType::InodeItem,
offset: 0,
}
}
fn filled_leaf(
nodesize: u32,
item_count: usize,
data_size: usize,
) -> ExtentBuffer {
let mut eb = ExtentBuffer::new_zeroed(nodesize, 65536);
eb.set_bytenr(65536);
eb.set_level(0);
eb.set_nritems(0);
eb.set_generation(1);
eb.set_owner(5);
let data = vec![0xAA; data_size];
for i in 0..item_count {
items::insert_item(&mut eb, i, &make_key(i as u64 + 1), &data)
.unwrap();
}
eb
}
#[test]
fn data_aware_split_point_uniform_items() {
let eb = filled_leaf(16384, 200, 32);
let nritems = eb.nritems() as usize;
let total_bytes: u32 = (0..nritems)
.map(|i| ITEM_SIZE as u32 + eb.item_size(i))
.sum();
let half = total_bytes / 2;
let mut running = 0u32;
let mut split = nritems / 2;
for i in 0..nritems {
running += ITEM_SIZE as u32 + eb.item_size(i);
if running >= half {
split = (i + 1).clamp(1, nritems - 1);
break;
}
}
assert!(
(split as i64 - nritems as i64 / 2).unsigned_abs() <= 1,
"split={split} but nritems/2={}",
nritems / 2
);
}
#[test]
fn data_aware_split_point_variable_items() {
let mut eb = ExtentBuffer::new_zeroed(4096, 65536);
eb.set_bytenr(65536);
eb.set_level(0);
eb.set_nritems(0);
eb.set_generation(1);
eb.set_owner(5);
items::insert_item(&mut eb, 0, &make_key(1), &[0x11; 1000]).unwrap();
items::insert_item(&mut eb, 1, &make_key(2), &[0x22; 50]).unwrap();
items::insert_item(&mut eb, 2, &make_key(3), &[0x33; 50]).unwrap();
items::insert_item(&mut eb, 3, &make_key(4), &[0x44; 50]).unwrap();
items::insert_item(&mut eb, 4, &make_key(5), &[0x55; 50]).unwrap();
let nritems = eb.nritems() as usize;
let total_bytes: u32 = (0..nritems)
.map(|i| ITEM_SIZE as u32 + eb.item_size(i))
.sum();
let half = total_bytes / 2;
let mut running = 0u32;
let mut split = nritems / 2;
for i in 0..nritems {
running += ITEM_SIZE as u32 + eb.item_size(i);
if running >= half {
split = (i + 1).clamp(1, nritems - 1);
break;
}
}
assert_eq!(split, 1, "split should be after the large item");
}
#[test]
fn split_point_clamps_minimum() {
let mut eb = ExtentBuffer::new_zeroed(4096, 65536);
eb.set_bytenr(65536);
eb.set_level(0);
eb.set_nritems(0);
eb.set_generation(1);
eb.set_owner(5);
items::insert_item(&mut eb, 0, &make_key(1), &[0x11; 2000]).unwrap();
items::insert_item(&mut eb, 1, &make_key(2), &[0x22; 50]).unwrap();
let nritems = eb.nritems() as usize;
let total_bytes: u32 = (0..nritems)
.map(|i| ITEM_SIZE as u32 + eb.item_size(i))
.sum();
let half = total_bytes / 2;
let mut running = 0u32;
let mut split = nritems / 2;
for i in 0..nritems {
running += ITEM_SIZE as u32 + eb.item_size(i);
if running >= half {
split = (i + 1).clamp(1, nritems - 1);
break;
}
}
assert_eq!(split, 1, "split should be 1 (at least one item per side)");
}
#[test]
fn leaf_data_compaction_after_truncate() {
let mut eb = filled_leaf(4096, 20, 32);
let nodesize = eb.nodesize();
let split = 10;
let end_before = eb.item_offset(0) + eb.item_size(0);
assert_eq!(end_before, nodesize - HEADER_SIZE as u32);
eb.set_nritems(split);
let end_after_trunc = eb.item_offset(0) + eb.item_size(0);
assert_eq!(end_after_trunc, nodesize - HEADER_SIZE as u32);
let mut data_end = nodesize;
for i in 0..split as usize {
let size = eb.item_size(i);
let old_data = eb.item_data(i).to_vec();
data_end -= size;
let new_offset = data_end - HEADER_SIZE as u32;
eb.set_item_offset(i, new_offset);
let abs_off = data_end as usize;
eb.as_bytes_mut()[abs_off..abs_off + size as usize]
.copy_from_slice(&old_data);
}
let end_compacted = eb.item_offset(0) + eb.item_size(0);
assert_eq!(end_compacted, nodesize - HEADER_SIZE as u32);
for i in 0..split as usize {
assert_eq!(eb.item_data(i), &[0xAA; 32]);
}
}
#[test]
fn find_parent_level_none() {
let path = BtrfsPath::new();
assert!(find_parent_level_above(&path, 0).is_none());
}
#[test]
fn find_parent_level_at_1() {
let mut path = BtrfsPath::new();
path.nodes[0] = Some(ExtentBuffer::new_zeroed(4096, 0));
path.nodes[1] = Some(ExtentBuffer::new_zeroed(4096, 65536));
assert_eq!(find_parent_level_above(&path, 0), Some(1));
}
#[test]
fn find_parent_level_at_2() {
let mut path = BtrfsPath::new();
path.nodes[0] = Some(ExtentBuffer::new_zeroed(4096, 0));
path.nodes[2] = Some(ExtentBuffer::new_zeroed(4096, 131072));
assert_eq!(find_parent_level_above(&path, 0), Some(2));
}
}