veclite-storage 1.0.0

Storage engine for VecLite
Documentation
use memmap2::Mmap;
use serde::{Deserialize, Serialize};
use std::fs::{File, OpenOptions};
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
use std::path::Path;
use thiserror::Error;

#[derive(Error, Debug)]
pub enum StorageError {
    #[error("IO error: {0}")]
    Io(#[from] io::Error),
    #[error("Serialization error: {0}")]
    Serde(#[from] serde_json::Error),
    #[error("Invalid format: {0}")]
    InvalidFormat(String),
}

pub type Result<T> = std::result::Result<T, StorageError>;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Record {
    pub id: String,
    pub vector: Vec<f32>,
    pub metadata: Option<serde_json::Value>,
    pub timestamp: Option<u64>,
}

pub struct Storage {
    pub file: File,
    pub mmap: Mmap,
    pub records: Vec<Record>,
}

impl Storage {
    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
        let path = path.as_ref();
        let file_exists = path.exists();

        let mut file = OpenOptions::new()
            .read(true)
            .write(true)
            .create(true)
            .truncate(false)
            .open(path)?;
        let mut records = Vec::new();

        if file_exists && file.metadata()?.len() > 0 {
            file.seek(SeekFrom::Start(0))?;
            let mut content = String::new();
            file.read_to_string(&mut content)?;
            for line in content.lines() {
                if !line.is_empty() {
                    let record: Record = serde_json::from_str(line)?;
                    records.push(record);
                }
            }
        }

        file.seek(SeekFrom::Start(0))?;
        let mmap = unsafe { Mmap::map(&file)? };

        Ok(Self {
            file,
            mmap,
            records,
        })
    }

    pub fn append(&mut self, record: Record) -> Result<()> {
        self.file.seek(SeekFrom::End(0))?;
        {
            let mut writer = BufWriter::new(&mut self.file);
            let line = serde_json::to_string(&record)? + "\n";
            writer.write_all(line.as_bytes())?;
            writer.flush()?;
        }
        self.records.push(record);
        self.mmap = unsafe { Mmap::map(&self.file)? };
        Ok(())
    }

    pub fn append_batch(&mut self, records: Vec<Record>) -> Result<()> {
        self.file.seek(SeekFrom::End(0))?;
        {
            let mut writer = BufWriter::new(&mut self.file);
            for record in records {
                let line = serde_json::to_string(&record)? + "\n";
                writer.write_all(line.as_bytes())?;
                self.records.push(record);
            }
            writer.flush()?;
        }
        self.mmap = unsafe { Mmap::map(&self.file)? };
        Ok(())
    }

    pub fn stats(&self) -> Result<(usize, usize)> {
        Ok((self.records.len(), self.file.metadata()?.len() as usize))
    }
}