syntheca 0.1.0

Content-addressable storage on top of apotheca. Bytes go in, BLAKE3 hash comes out.
Documentation
//! syntheca — content-addressable storage on top of apotheca. See SPEC.md
//! for the protocol.
//!
//! Phase 1: BLAKE3 fixed as the hash function, single apotheca pool, three
//! operations (`put`, `get`, `stat`), verify-on-read defaulting on.
//! Multi-pool composition, alternative hashes, and configuration are
//! deferred (SPEC §8).

mod hash;

pub use hash::{Hash, HashParseError};

use std::io;
use std::path::Path;

/// Construction options for [`Pool`]. SPEC §4.
#[derive(Debug, Clone, Copy)]
pub struct Options {
    /// Recompute `blake3(bytes)` on every `get` and return `IntegrityError`
    /// if it does not match the requested hash. apotheca's SHA-256
    /// verification runs regardless. Default: `true`.
    pub verify_on_read: bool,
}

impl Default for Options {
    fn default() -> Self {
        Self { verify_on_read: true }
    }
}

/// Metadata returned by `stat` (SPEC §2.3). The BLAKE3 hash is the input
/// and is not re-reported; `sha256` is apotheca's storage digest.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Stat {
    pub size: u64,
    pub sha256: [u8; 32],
}

#[derive(Debug)]
pub enum PutError {
    /// Two distinct byte sequences produced the same BLAKE3 hash. SPEC §2.1,
    /// §3.3. With a collision-resistant hash this does not occur from honest
    /// inputs.
    HashCollision,
    /// An error from the underlying apotheca pool.
    Apotheca(apotheca::PutError),
}

#[derive(Debug)]
pub enum GetError {
    NotFound,
    /// Either apotheca's stored SHA-256 did not match the bytes read, or
    /// `verify_on_read` was enabled and `blake3(bytes)` did not match the
    /// requested hash. SPEC §2.2, §4.
    IntegrityError,
    Apotheca(apotheca::GetError),
}

#[derive(Debug)]
pub enum StatError {
    NotFound,
    Apotheca(apotheca::StatError),
}

impl std::fmt::Display for PutError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            PutError::HashCollision => f.write_str(
                "blake3 collision: distinct bytes hashed to an existing entry's name",
            ),
            PutError::Apotheca(e) => write!(f, "{e}"),
        }
    }
}

impl std::fmt::Display for GetError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            GetError::NotFound => f.write_str("not found"),
            GetError::IntegrityError => {
                f.write_str("integrity error: stored bytes do not match expected hash")
            }
            GetError::Apotheca(e) => write!(f, "{e}"),
        }
    }
}

impl std::fmt::Display for StatError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            StatError::NotFound => f.write_str("not found"),
            StatError::Apotheca(e) => write!(f, "{e}"),
        }
    }
}

impl std::error::Error for PutError {}
impl std::error::Error for GetError {}
impl std::error::Error for StatError {}

/// A syntheca pool: an apotheca pool whose names are BLAKE3 hashes of their
/// bytes (SPEC §1, §2).
pub struct Pool {
    inner: apotheca::Pool,
    verify_on_read: bool,
}

impl Pool {
    /// Open (or create) a pool at the given root with default options.
    pub fn open<P: AsRef<Path>>(root: P) -> io::Result<Self> {
        Self::open_with(root, Options::default())
    }

    /// Open (or create) a pool at the given root with explicit options.
    pub fn open_with<P: AsRef<Path>>(root: P, opts: Options) -> io::Result<Self> {
        let inner = apotheca::Pool::open(root)?;
        Ok(Self { inner, verify_on_read: opts.verify_on_read })
    }

    /// Wrap an already-open apotheca pool. Useful when the caller is already
    /// holding an `apotheca::Pool` for other purposes.
    pub fn from_apotheca(inner: apotheca::Pool, opts: Options) -> Self {
        Self { inner, verify_on_read: opts.verify_on_read }
    }

    pub fn root(&self) -> &Path {
        self.inner.root()
    }

    pub fn verify_on_read(&self) -> bool {
        self.verify_on_read
    }

    /// SPEC §2.1. Compute `blake3(bytes)`, write through apotheca, return
    /// the hash. Idempotent for identical bytes; surfaces apotheca's
    /// `Collision` outcome as [`PutError::HashCollision`].
    pub fn put(&self, bytes: &[u8]) -> Result<Hash, PutError> {
        let hash = Hash::of(bytes);
        let name = hash_name(&hash);
        let name = apotheca::Name::new(&name).expect("64-char hex is a valid apotheca name");
        match self.inner.put(&name, bytes).map_err(PutError::Apotheca)? {
            apotheca::PutOutcome::Ok => Ok(hash),
            apotheca::PutOutcome::Collision => Err(PutError::HashCollision),
        }
    }

    /// SPEC §2.2. Read by hash. apotheca verifies SHA-256; if `verify_on_read`
    /// is enabled, also rehashes under BLAKE3.
    pub fn get(&self, hash: &Hash) -> Result<Vec<u8>, GetError> {
        let name = hash_name(hash);
        let name = apotheca::Name::new(&name).expect("64-char hex is a valid apotheca name");
        let bytes = match self.inner.get(&name) {
            Ok(b) => b,
            Err(apotheca::GetError::NotFound) => return Err(GetError::NotFound),
            Err(apotheca::GetError::IntegrityError) => return Err(GetError::IntegrityError),
            Err(e) => return Err(GetError::Apotheca(e)),
        };
        if self.verify_on_read && Hash::of(&bytes) != *hash {
            return Err(GetError::IntegrityError);
        }
        Ok(bytes)
    }

    /// SPEC §2.3. Returns size and apotheca's SHA-256 storage digest.
    pub fn stat(&self, hash: &Hash) -> Result<Stat, StatError> {
        let name = hash_name(hash);
        let name = apotheca::Name::new(&name).expect("64-char hex is a valid apotheca name");
        match self.inner.stat(&name) {
            Ok(meta) => Ok(Stat { size: meta.size, sha256: meta.sha256 }),
            Err(apotheca::StatError::NotFound) => Err(StatError::NotFound),
            Err(e) => Err(StatError::Apotheca(e)),
        }
    }
}

/// Encode a hash as the bytes of its apotheca name (64 ASCII lowercase hex).
fn hash_name(hash: &Hash) -> [u8; 64] {
    let mut buf = [0u8; 64];
    hex::encode_to_slice(hash.as_bytes(), &mut buf).expect("32 bytes always fit in 64 hex");
    buf
}