Skip to main content

content_cas/
lib.rs

1//! # content-cas
2//!
3//! Content-addressed cache. Store bytes under their SHA-256 hex, retrieve
4//! by hex. On-disk layout is `root/aa/bbbb...` (first 2 hex chars become
5//! a subdirectory to keep filesystem ls fast even at millions of keys).
6//!
7//! Reasonable for embeddings, model responses, tokenizer outputs — any
8//! large, immutable, deterministic blob whose key is its content.
9//!
10//! ## Example
11//!
12//! ```no_run
13//! use content_cas::Cas;
14//! let cas = Cas::new("/tmp/my-cas").unwrap();
15//! let hash = cas.put(b"hello world").unwrap();
16//! assert_eq!(hash.len(), 64);
17//! let bytes = cas.get(&hash).unwrap().unwrap();
18//! assert_eq!(bytes, b"hello world");
19//! ```
20
21#![deny(missing_docs)]
22
23mod sha256;
24
25use std::fs;
26use std::io;
27use std::path::{Path, PathBuf};
28
29/// On-disk content-addressed cache.
30#[derive(Debug, Clone)]
31pub struct Cas {
32    root: PathBuf,
33}
34
35impl Cas {
36    /// Create or open a CAS rooted at `root`.
37    pub fn new(root: impl AsRef<Path>) -> io::Result<Self> {
38        let root = root.as_ref().to_path_buf();
39        fs::create_dir_all(&root)?;
40        Ok(Self { root })
41    }
42
43    /// Compute the SHA-256 of `bytes` and store. Returns the 64-char hex
44    /// key. Re-writing the same key is a no-op.
45    pub fn put(&self, bytes: &[u8]) -> io::Result<String> {
46        let hash = sha256::hex(bytes);
47        let p = self.path_for(&hash);
48        if !p.exists() {
49            if let Some(parent) = p.parent() {
50                fs::create_dir_all(parent)?;
51            }
52            // Atomic write: write to .tmp, then rename.
53            let tmp = p.with_extension("tmp");
54            fs::write(&tmp, bytes)?;
55            fs::rename(&tmp, &p)?;
56        }
57        Ok(hash)
58    }
59
60    /// Retrieve the bytes for a hex key. Returns `Ok(None)` if absent.
61    pub fn get(&self, hash: &str) -> io::Result<Option<Vec<u8>>> {
62        let p = self.path_for(hash);
63        match fs::read(&p) {
64            Ok(b) => Ok(Some(b)),
65            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(None),
66            Err(e) => Err(e),
67        }
68    }
69
70    /// True when `hash` is present.
71    pub fn contains(&self, hash: &str) -> bool {
72        self.path_for(hash).exists()
73    }
74
75    /// Delete the entry for `hash`. Returns `Ok(false)` if absent.
76    pub fn remove(&self, hash: &str) -> io::Result<bool> {
77        let p = self.path_for(hash);
78        match fs::remove_file(&p) {
79            Ok(()) => Ok(true),
80            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false),
81            Err(e) => Err(e),
82        }
83    }
84
85    /// Compute the path where `hash` is or would be stored.
86    pub fn path_for(&self, hash: &str) -> PathBuf {
87        let (prefix, rest) = hash.split_at(2);
88        self.root.join(prefix).join(rest)
89    }
90}