pub mod config;
pub mod stats;
pub mod buffer;
pub mod basic_cache;
#[cfg(test)]
mod simple_tests;
pub use config::*;
pub use stats::*;
pub use buffer::*;
pub use basic_cache::{LruPageCache, SingleLruPageCache};
use crate::error::{Result, ZiporaError};
use std::collections::HashMap;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::RwLock;
pub const PAGE_SIZE: usize = 4096;
pub const PAGE_BITS: usize = 12;
pub const HUGE_PAGE_SIZE: usize = 2 * 1024 * 1024;
pub const MAX_SHARDS: usize = 64;
pub const CACHE_LINE_SIZE: usize = 64;
#[derive(Debug, Clone, PartialEq)]
pub enum CacheError {
CacheFull,
InvalidPageSize,
FileNotFound,
InvalidShardConfig,
AllocationFailed,
HardwareUnsupported,
}
impl std::fmt::Display for CacheError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CacheError::CacheFull => write!(f, "Cache is full"),
CacheError::InvalidPageSize => write!(f, "Invalid page size or alignment"),
CacheError::FileNotFound => write!(f, "File not found in cache"),
CacheError::InvalidShardConfig => write!(f, "Invalid shard configuration"),
CacheError::AllocationFailed => write!(f, "Memory allocation failed"),
CacheError::HardwareUnsupported => write!(f, "Required hardware feature not available"),
}
}
}
impl std::error::Error for CacheError {}
impl From<CacheError> for ZiporaError {
fn from(err: CacheError) -> Self {
ZiporaError::invalid_data(err.to_string())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
pub enum CacheHitType {
Hit = 0,
EvictedOthers = 1,
InitialFree = 2,
DroppedFree = 3,
HitOthersLoad = 4,
Mix = 5,
Miss = 6,
}
impl CacheHitType {
pub fn as_index(self) -> usize {
self as usize
}
pub fn description(self) -> &'static str {
match self {
CacheHitType::Hit => "Cache Hit",
CacheHitType::EvictedOthers => "Evicted Others",
CacheHitType::InitialFree => "Initial Free",
CacheHitType::DroppedFree => "Dropped Free",
CacheHitType::HitOthersLoad => "Hit Others Load",
CacheHitType::Mix => "Mixed Operation",
CacheHitType::Miss => "Cache Miss",
}
}
}
pub type FileId = u32;
pub type PageId = u32;
pub type NodeIndex = u32;
pub const INVALID_NODE: NodeIndex = u32::MAX;
#[inline]
pub fn hash_file_page(file_id: FileId, page_id: PageId) -> u64 {
let fi_page_id = ((file_id as u64) << 32) | (page_id as u64);
let hash1 = (fi_page_id << 3) | (fi_page_id >> 61);
hash1.swap_bytes()
}
#[inline]
pub fn get_shard_id(file_id: FileId, page_id: PageId, num_shards: u32) -> u32 {
let hash = hash_file_page(file_id, page_id);
(hash % (num_shards as u64)) as u32
}
#[inline]
pub fn prefetch_hint(ptr: *const u8) {
#[cfg(target_arch = "x86_64")]
{
unsafe {
std::arch::x86_64::_mm_prefetch(ptr as *const i8, std::arch::x86_64::_MM_HINT_T0);
}
}
#[cfg(target_arch = "aarch64")]
{
unsafe {
std::arch::aarch64::__pldl1keep(ptr);
}
}
}
#[derive(Debug)]
pub struct FileManager {
files: RwLock<HashMap<FileId, FileEntry>>,
next_file_id: AtomicU32,
}
#[derive(Debug)]
struct FileEntry {
file: File,
path: PathBuf,
size: u64,
}
impl FileManager {
pub fn new() -> Self {
Self {
files: RwLock::new(HashMap::new()),
next_file_id: AtomicU32::new(1), }
}
pub fn open_file<P: AsRef<Path>>(&self, path: P) -> Result<FileId> {
let path = path.as_ref().to_path_buf();
let file = File::open(&path)
.map_err(|e| ZiporaError::invalid_data(format!("Failed to open file {:?}: {}", path, e)))?;
let size = file.metadata()
.map_err(|e| ZiporaError::invalid_data(format!("Failed to get file metadata {:?}: {}", path, e)))?
.len();
let file_id = self.next_file_id.fetch_add(1, Ordering::Relaxed);
let entry = FileEntry { file, path, size };
let mut files = self.files.write()
.map_err(|_| ZiporaError::invalid_data("FileManager lock poisoned".to_string()))?;
files.insert(file_id, entry);
Ok(file_id)
}
pub fn file_size(&self, file_id: FileId) -> Result<u64> {
let files = self.files.read()
.map_err(|_| ZiporaError::invalid_data("FileManager lock poisoned".to_string()))?;
files.get(&file_id)
.map(|entry| entry.size)
.ok_or_else(|| ZiporaError::invalid_data(format!("File ID {} not found", file_id)))
}
pub fn offset_to_page_id(offset: u64) -> PageId {
(offset / PAGE_SIZE as u64) as PageId
}
pub fn offset_within_page(offset: u64) -> usize {
(offset % PAGE_SIZE as u64) as usize
}
pub fn page_aligned_offset(page_id: PageId) -> u64 {
(page_id as u64) * (PAGE_SIZE as u64)
}
pub fn read_page(&self, file_id: FileId, page_id: PageId, buffer: &mut [u8]) -> Result<usize> {
if buffer.len() != PAGE_SIZE {
return Err(ZiporaError::invalid_data(format!(
"Buffer size {} != PAGE_SIZE {}", buffer.len(), PAGE_SIZE
)));
}
let mut files = self.files.write()
.map_err(|_| ZiporaError::invalid_data("FileManager lock poisoned".to_string()))?;
let entry = files.get_mut(&file_id)
.ok_or_else(|| ZiporaError::invalid_data(format!("File ID {} not found", file_id)))?;
let offset = Self::page_aligned_offset(page_id);
if offset >= entry.size {
return Ok(0); }
entry.file.seek(SeekFrom::Start(offset))
.map_err(|e| ZiporaError::invalid_data(format!(
"Failed to seek to offset {} in file {:?}: {}", offset, entry.path, e
)))?;
let bytes_to_read = std::cmp::min(PAGE_SIZE, (entry.size - offset) as usize);
let mut bytes_read = 0;
while bytes_read < bytes_to_read {
match entry.file.read(&mut buffer[bytes_read..bytes_to_read]) {
Ok(0) => break, Ok(n) => bytes_read += n,
Err(e) => return Err(ZiporaError::invalid_data(format!(
"Failed to read from file {:?} at offset {}: {}", entry.path, offset, e
))),
}
}
if bytes_read < PAGE_SIZE {
buffer[bytes_read..].fill(0);
}
Ok(bytes_read)
}
pub fn read_data(&self, file_id: FileId, offset: u64, length: usize, buffer: &mut [u8]) -> Result<usize> {
if buffer.len() < length {
return Err(ZiporaError::invalid_data(format!(
"Buffer size {} < requested length {}", buffer.len(), length
)));
}
let files = self.files.read()
.map_err(|_| ZiporaError::invalid_data("FileManager lock poisoned".to_string()))?;
let entry = files.get(&file_id)
.ok_or_else(|| ZiporaError::invalid_data(format!("File ID {} not found", file_id)))?;
if offset >= entry.size {
return Ok(0);
}
let bytes_to_read = std::cmp::min(length, (entry.size - offset) as usize);
drop(files);
let mut files = self.files.write()
.map_err(|_| ZiporaError::invalid_data("FileManager lock poisoned".to_string()))?;
let entry = files.get_mut(&file_id)
.ok_or_else(|| ZiporaError::invalid_data(format!("File ID {} not found", file_id)))?;
entry.file.seek(SeekFrom::Start(offset))
.map_err(|e| ZiporaError::invalid_data(format!(
"Failed to seek to offset {} in file {:?}: {}", offset, entry.path, e
)))?;
let mut bytes_read = 0;
while bytes_read < bytes_to_read {
match entry.file.read(&mut buffer[bytes_read..bytes_to_read]) {
Ok(0) => break,
Ok(n) => bytes_read += n,
Err(e) => return Err(ZiporaError::invalid_data(format!(
"Failed to read from file {:?} at offset {}: {}", entry.path, offset, e
))),
}
}
Ok(bytes_read)
}
pub fn close_file(&self, file_id: FileId) -> Result<()> {
let mut files = self.files.write()
.map_err(|_| ZiporaError::invalid_data("FileManager lock poisoned".to_string()))?;
files.remove(&file_id)
.ok_or_else(|| ZiporaError::invalid_data(format!("File ID {} not found", file_id)))?;
Ok(())
}
}