ring-db 0.3.0

A Rust library for ring queries in high-dimensional vector spaces.
Documentation
/// Binary file-format utilities for ring-db persistence.
///
/// All numeric values are stored in **little-endian** byte order so the format
/// is portable across architectures.
///
/// ### Layout of a persisted database folder
///
/// | File | Content |
/// |------|---------|
/// | `meta.bin` | `dims` (u64 LE) + `n_vectors` (u64 LE) = 16 bytes |
/// | `vectors.bin` | `n_vectors × dims` f32 values in row-major order |
/// | `norms_sq.bin` | `n_vectors` precomputed squared L2 norms (f32) |
/// | `payloads.bin` | concatenated bincode bytes for every payload |
/// | `offsets.bin` | `n_vectors + 1` byte offsets (u64) into `payloads.bin` |
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;

use crate::error::{Result, RingDbError};

// ── Meta ─────────────────────────────────────────────────────────────────────

/// Write `(dims, n_vectors)` as two little-endian u64 values.
pub fn write_meta(path: &Path, dims: usize, n_vectors: usize) -> Result<()> {
    let mut f = File::create(path)?;
    f.write_all(&(dims as u64).to_le_bytes())?;
    f.write_all(&(n_vectors as u64).to_le_bytes())?;
    Ok(())
}

/// Read `(dims, n_vectors)` from the meta file.
pub fn read_meta(path: &Path) -> Result<(usize, usize)> {
    let bytes = std::fs::read(path)?;
    if bytes.len() < 16 {
        return Err(RingDbError::Corrupt(format!(
            "meta file '{}' is too short ({} bytes, expected ≥ 16)",
            path.display(),
            bytes.len()
        )));
    }
    let dims = u64::from_le_bytes(bytes[0..8].try_into().unwrap()) as usize;
    let n_vectors = u64::from_le_bytes(bytes[8..16].try_into().unwrap()) as usize;
    Ok((dims, n_vectors))
}

// ── f32 vectors ──────────────────────────────────────────────────────────────

/// Write a `&[f32]` slice as raw little-endian bytes.
pub fn write_f32_file(path: &Path, data: &[f32]) -> Result<()> {
    // Use a large BufWriter so we make a small number of write syscalls even
    // for datasets with millions of vectors.
    let mut w = BufWriter::with_capacity(1 << 20, File::create(path)?);
    for &x in data {
        w.write_all(&x.to_le_bytes())?;
    }
    Ok(())
}

/// Read a file of raw little-endian f32 values into a `Vec<f32>`.
pub fn read_f32_file(path: &Path) -> Result<Vec<f32>> {
    let bytes = std::fs::read(path)?;
    if bytes.len() % 4 != 0 {
        return Err(RingDbError::Corrupt(format!(
            "f32 file '{}' has {} bytes (not a multiple of 4)",
            path.display(),
            bytes.len()
        )));
    }
    Ok(bytes
        .chunks_exact(4)
        .map(|b| f32::from_le_bytes(b.try_into().unwrap()))
        .collect())
}

// ── u64 offsets ──────────────────────────────────────────────────────────────

/// Write a `&[u64]` slice as raw little-endian bytes.
pub fn write_u64_file(path: &Path, data: &[u64]) -> Result<()> {
    let mut w = BufWriter::with_capacity(1 << 20, File::create(path)?);
    for &x in data {
        w.write_all(&x.to_le_bytes())?;
    }
    Ok(())
}

/// Read a file of raw little-endian u64 values into a `Vec<u64>`.
pub fn read_u64_file(path: &Path) -> Result<Vec<u64>> {
    let bytes = std::fs::read(path)?;
    if bytes.len() % 8 != 0 {
        return Err(RingDbError::Corrupt(format!(
            "u64 file '{}' has {} bytes (not a multiple of 8)",
            path.display(),
            bytes.len()
        )));
    }
    Ok(bytes
        .chunks_exact(8)
        .map(|b| u64::from_le_bytes(b.try_into().unwrap()))
        .collect())
}

// ── File move ────────────────────────────────────────────────────────────────

/// Move `src` to `dst`.
///
/// Tries an atomic `rename` first (O(1) on the same filesystem). Falls back to
/// a copy-then-delete if the two paths span different mount points.
pub fn move_file(src: &Path, dst: &Path) -> Result<()> {
    match std::fs::rename(src, dst) {
        Ok(()) => Ok(()),
        Err(_) => {
            std::fs::copy(src, dst)?;
            std::fs::remove_file(src)?;
            Ok(())
        }
    }
}