use crate::stg::Storage;
use crate::{nd, util, Arc, Data};
use std::cmp::min;
use std::collections::BTreeSet;
pub struct CompactFile {
pub stg: Box<dyn Storage>,
pub(crate) sp_size: usize,
pub(crate) ep_size: usize,
ep_resvd: u64,
ep_count: u64,
ep_free: BTreeSet<u64>,
lp_alloc: u64,
lp_first: u64,
lp_alloc_dirty: bool,
lp_free: BTreeSet<u64>,
is_new: bool,
}
impl CompactFile {
const HSIZE: u64 = 28;
const SPECIAL_VALUE: u64 = 0xf1e2d3c4b5a697;
pub fn new(stg: Box<dyn Storage>, sp_size: usize, ep_size: usize) -> Self {
let fsize = stg.size();
let is_new = fsize == 0;
let mut x = Self {
sp_size,
ep_size,
stg,
ep_resvd: 10,
ep_count: 10,
ep_free: BTreeSet::new(),
lp_alloc: 0,
lp_first: u64::MAX,
lp_alloc_dirty: false,
lp_free: BTreeSet::new(),
is_new,
};
if is_new {
x.stg.write_u64(0, x.ep_resvd);
x.write_u16(24, x.sp_size as u16);
x.write_u16(26, x.ep_size as u16);
x.lp_alloc_dirty = true;
} else {
x.ep_resvd = x.stg.read_u64(0);
x.lp_alloc = x.stg.read_u64(8);
x.lp_first = x.stg.read_u64(16);
x.sp_size = x.read_u16(24);
x.ep_size = x.read_u16(26);
}
x.ep_count = (fsize + (x.ep_size as u64) - 1) / (x.ep_size as u64);
if x.ep_count < x.ep_resvd {
x.ep_count = x.ep_resvd;
}
if is_new {
x.save();
}
x
}
pub fn lp_size(&self, lpnum: u64) -> usize {
if self.lp_valid(lpnum) {
self.read_u16(Self::HSIZE + (self.sp_size as u64) * lpnum)
} else {
0
}
}
pub fn set_page(&mut self, lpnum: u64, data: Data) {
debug_assert!(!self.lp_free.contains(&lpnum));
self.extend_starter_pages(lpnum);
let size = data.len();
let ext = self.ext(size);
let foff = Self::HSIZE + (self.sp_size as u64) * lpnum;
let old_size = self.read_u16(foff);
let mut old_ext = self.ext(old_size);
let mut info = vec![0_u8; 2 + old_ext * 8];
self.stg.read(foff, &mut info);
util::set(&mut info, 0, size as u64, 2);
if ext != old_ext {
while old_ext > ext {
old_ext -= 1;
let fp = util::getu64(&info, 2 + old_ext * 8);
info.resize(info.len() - 8, 0); self.ep_free.insert(fp);
}
while old_ext < ext {
let np = self.ep_alloc();
info.resize(info.len() + 8, 0);
util::setu64(&mut info[2 + old_ext * 8..], np);
old_ext += 1;
}
}
let mut done = 0;
for i in 0..ext {
let amount = min(size - done, self.ep_size - 8);
let page = util::getu64(&info, 2 + i * 8);
let foff = page * (self.ep_size as u64);
self.stg.write_u64(foff, lpnum);
self.stg.write_data(foff + 8, data.clone(), done, amount);
done += amount;
}
let amount = size - done;
if amount > 0 {
let off = 2 + ext * 8;
assert!(off + amount <= self.sp_size);
self.stg.write_data(foff + off as u64, data, done, amount);
}
debug_assert!(info.len() == 2 + ext * 8);
self.stg.write_vec(foff, info);
}
pub fn get_page(&self, lpnum: u64) -> Data {
if !self.lp_valid(lpnum) {
return nd();
}
let mut starter = vec![0_u8; self.sp_size];
let foff = Self::HSIZE + (self.sp_size as u64) * lpnum;
self.stg.read(foff, &mut starter);
let size = util::get(&starter, 0, 2) as usize; let mut data = vec![0u8; size];
let ext = self.ext(size); let mut done = 0;
for i in 0..ext {
let amount = min(size - done, self.ep_size - 8);
let page = util::getu64(&starter, 2 + i * 8);
let roff = page * (self.ep_size as u64);
debug_assert!(self.stg.read_u64(roff) == lpnum);
self.stg.read(roff + 8, &mut data[done..done + amount]);
done += amount;
}
let amount = size - done;
if amount > 0 {
let off = 2 + ext * 8;
data[done..size].copy_from_slice(&starter[off..off + amount]);
}
Arc::new(data)
}
fn next_free(&self, p: u64) -> u64 {
let lpoff = Self::HSIZE + p * self.sp_size as u64;
debug_assert!(self.read_u16(lpoff) == 0);
debug_assert!(self.stg.read_u64(lpoff + 10) == Self::SPECIAL_VALUE);
self.stg.read_u64(lpoff + 2)
}
pub fn alloc_page(&mut self) -> u64 {
if let Some(p) = self.lp_free.pop_first() {
p
} else {
self.lp_alloc_dirty = true;
let mut p = self.lp_first;
if p != u64::MAX {
self.lp_first = self.next_free(p);
} else {
p = self.lp_alloc;
self.lp_alloc += 1;
}
p
}
}
pub fn free_page(&mut self, pnum: u64) {
self.lp_free.insert(pnum);
}
pub fn is_new(&self) -> bool {
self.is_new
}
pub fn rollback(&mut self) {
self.lp_free.clear();
if self.lp_alloc_dirty {
self.lp_alloc_dirty = false;
self.lp_alloc = self.stg.read_u64(8);
self.lp_first = self.stg.read_u64(16);
}
}
pub fn save(&mut self) {
let flist = std::mem::take(&mut self.lp_free);
for p in flist.iter().rev() {
let p = *p;
self.set_page(p, nd());
let lpoff = Self::HSIZE + p * self.sp_size as u64;
self.stg.write_u64(lpoff + 10, Self::SPECIAL_VALUE); self.stg.write_u64(lpoff + 2, self.lp_first);
self.lp_first = p;
self.lp_alloc_dirty = true;
}
while !self.ep_free.is_empty() {
self.ep_count -= 1;
let from = self.ep_count;
if !self.ep_free.remove(&from) {
let to = self.ep_alloc();
self.relocate(from, to);
}
}
if self.lp_alloc_dirty {
self.lp_alloc_dirty = false;
self.stg.write_u64(8, self.lp_alloc);
self.stg.write_u64(16, self.lp_first);
}
self.stg.commit(self.ep_count * self.ep_size as u64);
}
fn read_u16(&self, offset: u64) -> usize {
let mut bytes = [0; 2];
self.stg.read(offset, &mut bytes);
u16::from_le_bytes(bytes) as usize
}
fn write_u16(&mut self, offset: u64, x: u16) {
self.stg.write(offset, &x.to_le_bytes());
}
fn relocate(&mut self, from: u64, to: u64) {
if from == to {
return;
}
let mut buffer = vec![0; self.ep_size];
self.stg.read(from * self.ep_size as u64, &mut buffer);
self.stg.write(to * self.ep_size as u64, &buffer);
let lpnum = util::getu64(&buffer, 0);
assert!(lpnum < self.lp_alloc);
let mut off = Self::HSIZE + lpnum * self.sp_size as u64;
let size = self.read_u16(off);
let mut ext = self.ext(size);
off += 2;
loop {
debug_assert!(ext != 0);
let x = self.stg.read_u64(off);
if x == from {
self.stg.write_u64(off, to);
break;
}
off += 8;
ext -= 1;
}
}
fn ep_clear(&mut self, epnum: u64) {
let buf = vec![0; self.ep_size];
self.stg.write(epnum * self.ep_size as u64, &buf);
}
fn lp_valid(&self, lpnum: u64) -> bool {
Self::HSIZE + (lpnum + 1) * (self.sp_size as u64) <= self.ep_resvd * (self.ep_size as u64)
}
fn extend_starter_pages(&mut self, lpnum: u64) {
let mut save = false;
while !self.lp_valid(lpnum) {
if !self.ep_free.remove(&self.ep_resvd)
{
self.relocate(self.ep_resvd, self.ep_count);
self.ep_count += 1;
}
self.ep_clear(self.ep_resvd);
self.ep_resvd += 1;
save = true;
}
if save {
self.stg.write_u64(0, self.ep_resvd);
}
}
fn ep_alloc(&mut self) -> u64 {
if let Some(pp) = self.ep_free.iter().next() {
let p = *pp;
self.ep_free.remove(&p);
p
} else {
let p = self.ep_count;
self.ep_count += 1;
p
}
}
fn ext(&self, size: usize) -> usize {
Self::ext_pages(self.sp_size, self.ep_size, size)
}
fn ext_pages(sp_size: usize, ep_size: usize, size: usize) -> usize {
let mut n = 0;
if size > (sp_size - 2) {
n = ((size - (sp_size - 2)) + (ep_size - 16 - 1)) / (ep_size - 16);
}
debug_assert!(2 + 16 * n + size <= sp_size + n * ep_size);
assert!(2 + n * 8 <= sp_size);
n
}
pub fn compress(sp_size: usize, ep_size: usize, size: usize, saving: usize) -> bool {
Self::ext_pages(sp_size, ep_size, size - saving) < Self::ext_pages(sp_size, ep_size, size)
}
#[cfg(feature = "verify")]
pub fn get_info(&self) -> (crate::HashSet<u64>, u64) {
let mut free = crate::HashSet::default();
let mut p = self.lp_first;
while p != u64::MAX {
assert!(free.insert(p));
p = self.next_free(p);
}
(free, self.lp_alloc)
}
#[cfg(feature = "renumber")]
pub fn load_free_pages(&mut self) -> u64 {
let mut p = self.lp_first;
while p != u64::MAX {
self.free_page(p);
p = self.next_free(p);
}
self.lp_first = p;
self.lp_alloc - self.lp_free.len() as u64
}
#[cfg(feature = "renumber")]
pub fn renumber(&mut self, lpnum: u64) -> u64 {
let lpnum2 = self.alloc_page();
if self.lp_valid(lpnum) {
let mut starter = vec![0_u8; self.sp_size];
let foff = Self::HSIZE + (self.sp_size as u64) * lpnum;
self.stg.read(foff, &mut starter);
let size = util::get(&starter, 0, 2) as usize; let ext = self.ext(size); for i in 0..ext {
let page = util::getu64(&starter, 2 + i * 8);
let woff = page * (self.ep_size as u64);
debug_assert!(self.stg.read_u64(woff) == lpnum);
self.stg.write_u64(woff, lpnum2);
}
let foff2 = Self::HSIZE + (self.sp_size as u64) * lpnum2;
self.stg.write_vec(foff2, starter);
}
self.free_page(lpnum);
lpnum2
}
#[cfg(feature = "renumber")]
fn reduce_starter_pages(&mut self, target: u64) {
let resvd = Self::HSIZE + target * self.sp_size as u64;
let resvd = (resvd + self.ep_size as u64 - 1) / self.ep_size as u64;
while self.ep_resvd > resvd {
self.ep_count -= 1;
let from = self.ep_count;
if !self.ep_free.remove(&from) {
self.ep_resvd -= 1;
self.relocate(from, self.ep_resvd);
}
}
self.stg.write_u64(0, self.ep_resvd);
}
#[cfg(feature = "renumber")]
pub fn set_lpalloc(&mut self, target: u64) {
assert!(self.lp_first == u64::MAX);
assert!(self.ep_free.len() == 0);
assert!(self.lp_free.len() as u64 == self.lp_alloc - target);
self.extend_starter_pages(target);
self.reduce_starter_pages(target);
self.lp_alloc = target;
self.lp_free.clear();
self.lp_alloc_dirty = true;
self.clear_lp();
}
#[cfg(feature = "renumber")]
fn clear_lp(&mut self) {
let start = Self::HSIZE + (self.sp_size as u64) * self.lp_alloc;
let end = self.ep_resvd * self.ep_size as u64;
let buf = vec![0; (end - start) as usize];
self.stg.write(start, &buf);
}
} #[test]
pub fn test() {
use crate::stg::MemFile;
use rand::Rng;
let mut rng = rand::thread_rng();
let s0 = MemFile::new();
let s1 = MemFile::new();
let mut cf0 = CompactFile::new(s0, 200, 512);
let mut cf1 = CompactFile::new(s1, 136, 1024);
for _ in 0..100 {
cf0.alloc_page();
cf1.alloc_page();
}
for _ in 0..100000 {
let n: usize = rng.gen::<usize>() % 5000;
let p: u64 = rng.gen::<u64>() % 100;
let b: u8 = rng.gen::<u8>();
let d = vec![b; n];
let d = Arc::new(d);
cf0.set_page(p, d.clone());
cf1.set_page(p, d.clone());
let p: u64 = rng.gen::<u64>() % 100;
let x = cf0.get_page(p);
let y = cf1.get_page(p);
assert!(x == y);
cf0.save();
cf1.save();
}
}