use memmap2::Mmap;
use serde::{Deserialize, Serialize};
use std::fs::{File, OpenOptions};
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
use std::path::Path;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum StorageError {
#[error("IO error: {0}")]
Io(#[from] io::Error),
#[error("Serialization error: {0}")]
Serde(#[from] serde_json::Error),
#[error("Invalid format: {0}")]
InvalidFormat(String),
}
pub type Result<T> = std::result::Result<T, StorageError>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Record {
pub id: String,
pub vector: Vec<f32>,
pub metadata: Option<serde_json::Value>,
pub timestamp: Option<u64>,
}
pub struct Storage {
pub file: File,
pub mmap: Mmap,
pub records: Vec<Record>,
}
impl Storage {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let path = path.as_ref();
let file_exists = path.exists();
let mut file = OpenOptions::new()
.read(true)
.write(true)
.create(true).truncate(false)
.open(path)?;
let mut records = Vec::new();
if file_exists && file.metadata()?.len() > 0 {
file.seek(SeekFrom::Start(0))?;
let mut content = String::new();
file.read_to_string(&mut content)?;
for line in content.lines() {
if !line.is_empty() {
let record: Record = serde_json::from_str(line)?;
records.push(record);
}
}
}
file.seek(SeekFrom::Start(0))?;
let mmap = unsafe { Mmap::map(&file)? };
Ok(Self {
file,
mmap,
records,
})
}
pub fn append(&mut self, record: Record) -> Result<()> {
self.file.seek(SeekFrom::End(0))?;
{
let mut writer = BufWriter::new(&mut self.file);
let line = serde_json::to_string(&record)? + "\n";
writer.write_all(line.as_bytes())?;
writer.flush()?;
}
self.records.push(record);
self.mmap = unsafe { Mmap::map(&self.file)? };
Ok(())
}
pub fn append_batch(&mut self, records: Vec<Record>) -> Result<()> {
self.file.seek(SeekFrom::End(0))?;
{
let mut writer = BufWriter::new(&mut self.file);
for record in records {
let line = serde_json::to_string(&record)? + "\n";
writer.write_all(line.as_bytes())?;
self.records.push(record);
}
writer.flush()?;
}
self.mmap = unsafe { Mmap::map(&self.file)? };
Ok(())
}
pub fn stats(&self) -> Result<(usize, usize)> {
Ok((self.records.len(), self.file.metadata()?.len() as usize))
}
}