mod generator;
mod id;
mod id_alloc;
use std::hash::{Hash, Hasher};
use rustc_hash::FxHasher;
pub use self::{generator::PackGenerator, id::PackId, id_alloc::PackIdAlloc};
use super::{
ScopeFileSystem,
index::{IndexGenerator, PackIndex},
};
use crate::{Error, Result};
#[derive(Debug, Default, PartialEq, Eq, Clone)]
pub struct Pack {
data: Vec<(Vec<u8>, Vec<u8>)>,
}
impl Pack {
pub fn new(data: Vec<(Vec<u8>, Vec<u8>)>) -> Self {
Self { data }
}
pub async fn load(fs: &ScopeFileSystem, id: PackId) -> Result<(Self, u64)> {
let pack_name = id.pack_name();
let mut reader = fs.stream_read(&pack_name).await?;
let mut content_hasher = FxHasher::default();
let mut data = vec![];
while let Ok(header) = reader.read_line().await {
if header.is_empty() {
break;
}
let parts: Vec<_> = header.split(' ').collect();
if parts.len() != 2 {
return Err(Error::InvalidFormat(format!(
"Invalid pack item header in '{pack_name}': expected 'key_len value_len', got '{header}'"
)));
}
let key_len = parts[0].parse::<usize>().map_err(|e| {
Error::InvalidFormat(format!(
"Failed to parse key length in '{pack_name}': invalid value '{}' ({e})",
parts[0]
))
})?;
let key = reader.read(key_len).await?;
key.hash(&mut content_hasher);
let value_len = parts[1].parse::<usize>().map_err(|e| {
Error::InvalidFormat(format!(
"Failed to parse value length in '{pack_name}': invalid value '{}' ({e})",
parts[1]
))
})?;
let value = reader.read(value_len).await?;
value.hash(&mut content_hasher);
data.push((key, value))
}
Ok((Self { data }, content_hasher.finish()))
}
pub async fn save(&self, fs: &ScopeFileSystem, id: PackId) -> Result<PackIndex> {
let mut writer = fs.stream_write(id.pack_name()).await?;
let mut index_gen = IndexGenerator::default();
for (key, value) in &self.data {
let header = format!("{} {}", key.len(), value.len());
writer.write_line(&header).await?;
writer.write(key).await?;
index_gen.add_key(key);
writer.write(value).await?;
index_gen.add_value(value);
}
writer.flush().await?;
Ok(index_gen.finish())
}
pub fn data(self) -> Vec<(Vec<u8>, Vec<u8>)> {
self.data
}
pub fn remove(&mut self, key: &[u8]) -> bool {
let original_len = self.data.len();
self.data.retain(|(k, _)| k.as_slice() != key);
self.data.len() < original_len
}
}
#[cfg(test)]
mod test {
use super::{Pack, PackId, Result, ScopeFileSystem};
#[tokio::test]
#[cfg_attr(miri, ignore)]
async fn test_pack() -> Result<()> {
let pack_id = PackId::new(10);
let fs = ScopeFileSystem::new_memory_fs("/bucket1".into());
fs.ensure_exist().await?;
assert!(Pack::load(&fs, pack_id).await.is_err());
let data: Vec<(Vec<u8>, Vec<u8>)> = vec![
("key1".into(), "value1".into()),
("key2".into(), "value2".into()),
("key3".into(), "value3".into()),
];
let mut pack = Pack::new(data.clone());
assert!(!pack.remove("key4".as_bytes()));
assert!(pack.remove("key2".as_bytes()));
let index = pack.save(&fs, pack_id).await?;
let (other_pack, content_hash) = Pack::load(&fs, pack_id).await?;
assert!(index.check_content_hash(content_hash));
assert_eq!(pack, other_pack);
assert!(index.contains_key("key1".as_bytes()));
assert!(!index.contains_key("key2".as_bytes()));
assert!(index.contains_key("key3".as_bytes()));
Ok(())
}
}