use std::collections::HashMap;
use std::fs::File;
use std::io::{self, Read, Write, Seek, SeekFrom};
use std::path::Path;
use chrono::{DateTime, Utc};
use serde::{Serialize, Deserialize};
use image::{ImageFormat};
use zstd;
use bincode;
use xxhash_rust::xxh3::xxh3_64;
use std::io::Cursor;
const MAGIC_BYTES: &[u8; 4] = b"USF1";
const VERSION: u8 = 1;
const BLOCK_SIZE: usize = 1024 * 64; const MIN_COMPRESS_SIZE: usize = 1024;
#[derive(Serialize, Deserialize, Debug, Clone)]
pub enum DataType {
Text,
Binary,
Image,
Json,
Structured,
}
#[derive(Serialize, Deserialize, Debug)]
struct BlockHeader {
data_type: DataType,
original_size: u64,
compressed_size: u64,
compression_method: CompressionMethod,
checksum: u64,
timestamp: DateTime<Utc>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
enum CompressionMethod {
None,
Zstd,
DeltaEncoding,
}
#[derive(Serialize, Deserialize, Debug)]
struct MetaData {
created: DateTime<Utc>,
modified: DateTime<Utc>,
total_blocks: u64,
index: HashMap<String, BlockLocation>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
struct BlockLocation {
offset: u64,
header_size: u32,
data_size: u64,
}
pub struct UniversalStorage {
file: File,
metadata: MetaData,
}
impl UniversalStorage {
pub fn create<P: AsRef<Path>>(path: P) -> io::Result<Self> {
let mut file = File::create(path)?;
file.write_all(MAGIC_BYTES)?;
file.write_all(&[VERSION])?;
let metadata = MetaData {
created: Utc::now(),
modified: Utc::now(),
total_blocks: 0,
index: HashMap::new(),
};
let metadata_bytes = bincode::serialize(&metadata)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
let metadata_size = metadata_bytes.len() as u64;
file.write_all(&metadata_size.to_le_bytes())?;
file.write_all(&metadata_bytes)?;
Ok(Self { file, metadata })
}
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
let mut file = File::open(path)?;
let mut magic = [0u8; 4];
file.read_exact(&mut magic)?;
if &magic != MAGIC_BYTES {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Invalid file format"));
}
let mut version = [0u8];
file.read_exact(&mut version)?;
if version[0] != VERSION {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Unsupported version"));
}
let mut size_bytes = [0u8; 8];
file.read_exact(&mut size_bytes)?;
let metadata_size = u64::from_le_bytes(size_bytes);
let mut metadata_bytes = vec![0u8; metadata_size as usize];
file.read_exact(&mut metadata_bytes)?;
let metadata: MetaData = bincode::deserialize(&metadata_bytes)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
Ok(Self { file, metadata })
}
pub fn store(&mut self, key: &str, data: &[u8], data_type: DataType) -> io::Result<()> {
let blocks = self.prepare_blocks(data, data_type)?;
let mut locations = Vec::new();
for block in blocks {
let location = self.write_block(&block)?;
locations.push(location);
}
if let Some(first_location) = locations.first() {
self.metadata.index.insert(key.to_string(), first_location.clone());
self.metadata.total_blocks += locations.len() as u64;
self.metadata.modified = Utc::now();
self.update_metadata()?;
}
Ok(())
}
pub fn retrieve(&mut self, key: &str) -> io::Result<Vec<u8>> {
let location = self.metadata.index.get(key)
.ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "Key not found"))?;
let mut result = Vec::new();
let mut current_location = Some(location.clone());
while let Some(loc) = current_location {
let block = self.read_block(&loc)?;
let checksum = xxh3_64(&block.data);
if checksum != block.header.checksum {
return Err(io::Error::new(io::ErrorKind::InvalidData, "Data corruption detected"));
}
result.extend_from_slice(&block.data);
current_location = block.next_location;
}
Ok(result)
}
fn prepare_blocks(&self, data: &[u8], data_type: DataType) -> io::Result<Vec<Block>> {
let mut blocks = Vec::new();
let mut offset = 0;
while offset < data.len() {
let chunk_size = std::cmp::min(BLOCK_SIZE, data.len() - offset);
let chunk = &data[offset..offset + chunk_size];
let (compressed_data, method) = if chunk.len() >= MIN_COMPRESS_SIZE {
match self.compress_data(chunk, &data_type) {
Ok((compressed, method)) => (compressed, method),
Err(_) => (chunk.to_vec(), CompressionMethod::None),
}
} else {
(chunk.to_vec(), CompressionMethod::None)
};
let checksum = xxh3_64(&compressed_data);
let header = BlockHeader {
data_type: data_type.clone(),
original_size: chunk.len() as u64,
compressed_size: compressed_data.len() as u64,
compression_method: method,
checksum,
timestamp: Utc::now(),
};
blocks.push(Block {
header,
data: compressed_data,
next_location: None,
});
offset += chunk_size;
}
for i in 0..blocks.len() - 1 {
blocks[i].next_location = Some(BlockLocation {
offset: 0, header_size: 0,
data_size: blocks[i + 1].data.len() as u64,
});
}
Ok(blocks)
}
fn compress_data(&self, data: &[u8], data_type: &DataType) -> io::Result<(Vec<u8>, CompressionMethod)> {
match data_type {
DataType::Text | DataType::Json => {
let compressed = zstd::encode_all(data, 21)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
Ok((compressed, CompressionMethod::Zstd))
},
DataType::Image => {
if let Ok(img) = image::load_from_memory(data) {
let mut output: Vec<u8> = Vec::new();
let mut output = std::io::Cursor::new(Vec::new());
img.write_to(&mut output, ImageFormat::WebP).map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
Ok((output.into_inner(), CompressionMethod::None))
} else {
let compressed = zstd::encode_all(data, 21)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
Ok((compressed, CompressionMethod::Zstd))
}
},
DataType::Structured => {
if let Ok(numbers) = bincode::deserialize::<Vec<i64>>(data) {
let encoded = self.delta_encode(&numbers);
Ok((encoded, CompressionMethod::DeltaEncoding))
} else {
let compressed = zstd::encode_all(data, 21)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
Ok((compressed, CompressionMethod::Zstd))
}
},
_ => {
let compressed = zstd::encode_all(data, 21)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
Ok((compressed, CompressionMethod::Zstd))
}
}
}
fn delta_encode(&self, numbers: &[i64]) -> Vec<u8> {
let mut encoded = Vec::with_capacity(numbers.len() * 8);
if numbers.is_empty() {
return encoded;
}
encoded.extend_from_slice(&numbers[0].to_le_bytes());
for window in numbers.windows(2) {
let diff = window[1] - window[0];
encoded.extend_from_slice(&diff.to_le_bytes());
}
encoded
}
fn write_block(&mut self, block: &Block) -> io::Result<BlockLocation> {
self.file.seek(SeekFrom::End(0))?;
let offset = self.file.stream_position()?;
let header_bytes = bincode::serialize(&block.header)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
let header_size = header_bytes.len() as u32;
self.file.write_all(&header_size.to_le_bytes())?;
self.file.write_all(&header_bytes)?;
self.file.write_all(&block.data)?;
Ok(BlockLocation {
offset,
header_size,
data_size: block.data.len() as u64,
})
}
fn read_block(&mut self, location: &BlockLocation) -> io::Result<Block> {
self.file.seek(SeekFrom::Start(location.offset))?;
let mut header_size_bytes = [0u8; 4];
self.file.read_exact(&mut header_size_bytes)?;
let header_size = u32::from_le_bytes(header_size_bytes);
let mut header_bytes = vec![0u8; header_size as usize];
self.file.read_exact(&mut header_bytes)?;
let header: BlockHeader = bincode::deserialize(&header_bytes)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
let mut data = vec![0u8; header.compressed_size as usize];
self.file.read_exact(&mut data)?;
Ok(Block {
header,
data,
next_location: None, })
}
fn update_metadata(&mut self) -> io::Result<()> {
let metadata_bytes = bincode::serialize(&self.metadata)
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
self.file.seek(SeekFrom::Start(5))?; self.file.write_all(&(metadata_bytes.len() as u64).to_le_bytes())?;
self.file.write_all(&metadata_bytes)?;
Ok(())
}
}
#[derive(Debug)]
struct Block {
header: BlockHeader,
data: Vec<u8>,
next_location: Option<BlockLocation>,
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_basic_storage() -> io::Result<()> {
let dir = tempdir()?;
let file_path = dir.path().join("test.usf");
let mut storage = UniversalStorage::create(&file_path)?;
let text = "Hello, World!".as_bytes();
storage.store("greeting", text, DataType::Text)?;
let retrieved = storage.retrieve("greeting")?;
assert_eq!(text, retrieved.as_slice());
Ok(())
}
#[test]
fn test_large_data() -> io::Result<()> {
let dir = tempdir()?;
let file_path = dir.path().join("test_large.usf");
let mut storage = UniversalStorage::create(&file_path)?;
let large_data: Vec<u8> = (0..BLOCK_SIZE * 3).map(|i| (i % 256) as u8).collect();
storage.store("large", &large_data, DataType::Binary)?;
let retrieved = storage.retrieve("large")?;
assert_eq!(large_data, retrieved);
Ok(())
}
}