content_cas/lib.rs
1//! # content-cas
2//!
3//! Content-addressed cache. Store bytes under their SHA-256 hex, retrieve
4//! by hex. On-disk layout is `root/aa/bbbb...` (first 2 hex chars become
5//! a subdirectory to keep filesystem ls fast even at millions of keys).
6//!
7//! Reasonable for embeddings, model responses, tokenizer outputs — any
8//! large, immutable, deterministic blob whose key is its content.
9//!
10//! ## Example
11//!
12//! ```no_run
13//! use content_cas::Cas;
14//! let cas = Cas::new("/tmp/my-cas").unwrap();
15//! let hash = cas.put(b"hello world").unwrap();
16//! assert_eq!(hash.len(), 64);
17//! let bytes = cas.get(&hash).unwrap().unwrap();
18//! assert_eq!(bytes, b"hello world");
19//! ```
20
21#![deny(missing_docs)]
22
23mod sha256;
24
25use std::fs;
26use std::io;
27use std::path::{Path, PathBuf};
28
29/// On-disk content-addressed cache.
30#[derive(Debug, Clone)]
31pub struct Cas {
32 root: PathBuf,
33}
34
35impl Cas {
36 /// Create or open a CAS rooted at `root`.
37 pub fn new(root: impl AsRef<Path>) -> io::Result<Self> {
38 let root = root.as_ref().to_path_buf();
39 fs::create_dir_all(&root)?;
40 Ok(Self { root })
41 }
42
43 /// Compute the SHA-256 of `bytes` and store. Returns the 64-char hex
44 /// key. Re-writing the same key is a no-op.
45 pub fn put(&self, bytes: &[u8]) -> io::Result<String> {
46 let hash = sha256::hex(bytes);
47 let p = self.path_for(&hash);
48 if !p.exists() {
49 if let Some(parent) = p.parent() {
50 fs::create_dir_all(parent)?;
51 }
52 // Atomic write: write to .tmp, then rename.
53 let tmp = p.with_extension("tmp");
54 fs::write(&tmp, bytes)?;
55 fs::rename(&tmp, &p)?;
56 }
57 Ok(hash)
58 }
59
60 /// Retrieve the bytes for a hex key. Returns `Ok(None)` if absent.
61 pub fn get(&self, hash: &str) -> io::Result<Option<Vec<u8>>> {
62 let p = self.path_for(hash);
63 match fs::read(&p) {
64 Ok(b) => Ok(Some(b)),
65 Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(None),
66 Err(e) => Err(e),
67 }
68 }
69
70 /// True when `hash` is present.
71 pub fn contains(&self, hash: &str) -> bool {
72 self.path_for(hash).exists()
73 }
74
75 /// Delete the entry for `hash`. Returns `Ok(false)` if absent.
76 pub fn remove(&self, hash: &str) -> io::Result<bool> {
77 let p = self.path_for(hash);
78 match fs::remove_file(&p) {
79 Ok(()) => Ok(true),
80 Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false),
81 Err(e) => Err(e),
82 }
83 }
84
85 /// Compute the path where `hash` is or would be stored.
86 pub fn path_for(&self, hash: &str) -> PathBuf {
87 let (prefix, rest) = hash.split_at(2);
88 self.root.join(prefix).join(rest)
89 }
90}