cyphr_storage/lib.rs
1//! # Cyphr Storage
2//!
3//! Storage backends for the Cyphr identity protocol.
4//!
5//! This crate provides a backend-agnostic storage API for persisting
6//! Cyphr principals, cozies, and actions. The core `Store` trait
7//! defines the minimal interface that any storage backend must implement.
8//!
9//! ## Design Principles
10//!
11//! - **Storage is dumb**: The storage layer only handles bytes. All semantic
12//! operations (verification, state computation) are handled by `cyphr`.
13//! - **Immutable history**: Entries are append-only; past entries are never modified.
14//! - **Order via `pre` chain**: Canonical order is derived from coz `pre`
15//! field chaining, not storage order.
16//! - **Bit-perfect preservation**: Entries store original JSON bytes to ensure
17//! correct `czd` computation. See `Entry` for details.
18//!
19//! ## Included Backends
20//!
21//! - [`FileStore`]: File-based storage using JSONL format (one file per principal).
22
23#![forbid(unsafe_code)]
24
25mod export;
26mod file;
27mod import;
28
29pub use export::{ExportError, PersistError, export_commits, export_entries, persist_entries};
30pub use file::{FileStore, FileStoreError};
31pub use import::{
32 Checkpoint, Genesis, LoadError, load_from_checkpoint, load_principal,
33 load_principal_from_commits,
34};
35
36use cyphr::state::PrincipalGenesis;
37use serde_json::value::RawValue;
38
39/// Storage backend trait.
40///
41/// Implementations provide persistence for signed Cyphr entries
42/// (cozies and actions). The trait is intentionally minimal:
43/// storage backends need only handle append and retrieval operations.
44pub trait Store {
45 /// The error type for this store implementation.
46 type Error: std::error::Error + Send + Sync + 'static;
47
48 /// Append a signed entry to the log.
49 fn append_entry(&self, pr: &PrincipalGenesis, entry: &Entry) -> Result<(), Self::Error>;
50
51 /// Retrieve all entries for a principal.
52 fn get_entries(&self, pr: &PrincipalGenesis) -> Result<Vec<Entry>, Self::Error>;
53
54 /// Retrieve entries with filtering (supports coz patches).
55 fn get_entries_range(
56 &self,
57 pr: &PrincipalGenesis,
58 opts: &QueryOpts,
59 ) -> Result<Vec<Entry>, Self::Error>;
60
61 /// Check if principal exists in storage.
62 fn exists(&self, pr: &PrincipalGenesis) -> Result<bool, Self::Error>;
63}
64
65/// Query options for filtered retrieval.
66#[derive(Default, Debug, Clone)]
67pub struct QueryOpts {
68 /// Only include entries with `now` > this timestamp.
69 pub after: Option<i64>,
70 /// Only include entries with `now` < this timestamp.
71 pub before: Option<i64>,
72 /// Maximum number of entries to return.
73 pub limit: Option<usize>,
74}
75
76/// A stored entry preserving bit-perfect JSON bytes.
77///
78/// **CRITICAL INVARIANT**: The original JSON string is preserved exactly as received.
79/// This ensures correct `czd` computation, which hashes the exact bytes of `pay`.
80///
81/// ## The Re-serialization Trap
82///
83/// A naive approach would parse JSON into `serde_json::Value`, then re-serialize
84/// for `czd` computation. This breaks signatures because re-serialization can change:
85/// - Field ordering
86/// - Whitespace
87/// - Number representation (e.g., `1.0` → `1`)
88///
89/// By storing `Box<RawValue>`, we preserve the original bytes and extract `pay`
90/// from the same source, ensuring bit-perfect fidelity.
91#[derive(Debug, Clone)]
92pub struct Entry {
93 /// The raw JSON string (bit-perfect, used for czd computation).
94 raw_json: Box<RawValue>,
95 /// The `now` timestamp extracted from pay.now (for filtering).
96 pub now: i64,
97}
98
99impl Entry {
100 /// Create an entry from a raw JSON string.
101 ///
102 /// This is the primary constructor for entries loaded from storage.
103 /// The original bytes are preserved exactly.
104 ///
105 /// # Errors
106 ///
107 /// Returns `EntryError::InvalidJson` if the string is not valid JSON.
108 /// Returns `EntryError::MissingNow` if `pay.now` is missing or not an integer.
109 pub fn from_json(json: String) -> Result<Self, EntryError> {
110 // Validate and convert to RawValue
111 let raw_json: Box<RawValue> =
112 serde_json::from_str(&json).map_err(|_| EntryError::InvalidJson)?;
113
114 // Extract timestamp for filtering
115 let now = Self::extract_now(&json)?;
116
117 Ok(Self { raw_json, now })
118 }
119
120 /// Create an entry from an owned RawValue.
121 ///
122 /// Useful when deserializing from a format that already provides RawValue.
123 pub fn from_raw_value(raw: Box<RawValue>) -> Result<Self, EntryError> {
124 let now = Self::extract_now(raw.get())?;
125 Ok(Self { raw_json: raw, now })
126 }
127
128 /// Create an entry from a serde_json::Value.
129 ///
130 /// **Warning**: This serializes the Value, which may not preserve original
131 /// byte ordering. Use only when creating new entries (e.g., during export),
132 /// not when loading from storage.
133 pub fn from_value(value: &serde_json::Value) -> Result<Self, EntryError> {
134 let json = serde_json::to_string(value).map_err(|_| EntryError::InvalidJson)?;
135 Self::from_json(json)
136 }
137
138 /// Get the raw JSON string.
139 ///
140 /// This returns the exact bytes stored, suitable for I/O operations.
141 pub fn raw_json(&self) -> &str {
142 self.raw_json.get()
143 }
144
145 /// Parse the entry as a serde_json::Value.
146 ///
147 /// Use this for field access (e.g., extracting `typ`, `key`).
148 /// **Do NOT use the resulting Value for czd computation** - use `pay_bytes()` instead.
149 pub fn as_value(&self) -> Result<serde_json::Value, EntryError> {
150 serde_json::from_str(self.raw_json.get()).map_err(|_| EntryError::InvalidJson)
151 }
152
153 /// Extract the `pay` field as raw bytes, preserving exact byte sequence.
154 ///
155 /// This is the critical method for `czd` computation. It extracts the `pay`
156 /// field from the original JSON, preserving exact bytes including whitespace
157 /// and field ordering.
158 ///
159 /// # Implementation Note
160 ///
161 /// We parse the raw JSON into a structure with RawValue for the pay field,
162 /// then return those bytes. This ensures we're extracting from the preserved
163 /// original, not re-serializing.
164 pub fn pay_bytes(&self) -> Result<Vec<u8>, EntryError> {
165 // Parse with pay as RawValue to preserve its exact bytes
166 #[derive(serde::Deserialize)]
167 struct PayExtractor<'a> {
168 #[serde(borrow)]
169 pay: &'a RawValue,
170 }
171
172 let extractor: PayExtractor =
173 serde_json::from_str(self.raw_json.get()).map_err(|_| EntryError::MissingPay)?;
174
175 Ok(extractor.pay.get().as_bytes().to_vec())
176 }
177
178 /// Extract `pay.now` timestamp from JSON string.
179 fn extract_now(json: &str) -> Result<i64, EntryError> {
180 #[derive(serde::Deserialize)]
181 struct PayNow {
182 now: i64,
183 }
184 #[derive(serde::Deserialize)]
185 struct NowExtractor {
186 pay: PayNow,
187 }
188
189 let extractor: NowExtractor =
190 serde_json::from_str(json).map_err(|_| EntryError::MissingNow)?;
191 Ok(extractor.pay.now)
192 }
193}
194
195/// Key material for a commit (SPEC §6).
196///
197/// Keys are stored at commit level per SPEC §5.2/§5.3, not embedded
198/// per-coz. Optional fields (`tag`, `now`) are semantically
199/// optional — not all keys have human-readable labels or creation
200/// timestamps at export time.
201#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
202pub struct KeyEntry {
203 /// Algorithm identifier (e.g., "ES256").
204 pub alg: String,
205 /// Public key (base64url-encoded).
206 #[serde(rename = "pub")]
207 pub pub_key: String,
208 /// Key thumbprint (base64url-encoded).
209 pub tmb: String,
210 /// Optional human-readable label.
211 #[serde(skip_serializing_if = "Option::is_none")]
212 pub tag: Option<String>,
213 /// Optional creation timestamp (maps to Key.first_seen).
214 #[serde(skip_serializing_if = "Option::is_none")]
215 pub now: Option<i64>,
216}
217
218/// A stored commit bundle for the commit-based JSONL format.
219///
220/// Each line in the JSONL file represents one finalized commit containing:
221/// - `cozies`: Array of coz entries (signed coz messages)
222/// - `keys`: Key material introduced in this commit (SPEC §5.2/§5.3)
223/// - `commit_id`: Commit ID (Merkle root of commit's coz czds)
224/// - `ar`: Auth Root (derived from KR)
225/// - `sr`: State Root (derived from AR and DR?)
226/// - `pr`: Principal Root (derived from SR and CR)
227///
228/// The derived state digests enable efficient indexing and verification
229/// without replaying the full coz history.
230#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
231pub struct CommitEntry {
232 /// ParsedCoz entries in this commit bundle.
233 #[serde(rename = "txs")]
234 pub cozies: Vec<serde_json::Value>,
235 /// Key material introduced in this commit.
236 pub keys: Vec<KeyEntry>,
237 /// Commit ID (per-commit Merkle root of coz czds).
238 #[serde(alias = "ts")]
239 pub commit_id: String,
240 /// Auth Root after this commit.
241 #[serde(rename = "ar")]
242 pub auth_root: String,
243 /// State Root: MR(AR, DR?).
244 #[serde(alias = "cs")]
245 pub sr: String,
246 /// Principal Root after this commit.
247 pub pr: String,
248}
249
250impl CommitEntry {
251 /// Create a new commit entry from components.
252 pub fn new(
253 cozies: Vec<serde_json::Value>,
254 keys: Vec<KeyEntry>,
255 commit_id: String,
256 auth_root: String,
257 sr: String,
258 pr: String,
259 ) -> Self {
260 Self {
261 cozies,
262 keys,
263 commit_id,
264 auth_root,
265 sr,
266 pr,
267 }
268 }
269
270 /// Get the raw JSON string for this commit entry.
271 pub fn to_json(&self) -> Result<String, serde_json::Error> {
272 serde_json::to_string(self)
273 }
274
275 /// Parse a commit entry from a JSON string.
276 pub fn from_json(json: &str) -> Result<Self, serde_json::Error> {
277 serde_json::from_str(json)
278 }
279}
280
281/// Errors that can occur when working with entries.
282#[derive(Debug, thiserror::Error)]
283pub enum EntryError {
284 /// Entry is not valid JSON.
285 #[error("invalid JSON")]
286 InvalidJson,
287 /// Entry is missing the required `pay.now` field.
288 #[error("entry missing pay.now field")]
289 MissingNow,
290 /// Entry is missing the required `pay` field.
291 #[error("entry missing pay field")]
292 MissingPay,
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298
299 #[test]
300 fn entry_from_json_extracts_now() {
301 let json = r#"{"pay":{"now":12345,"typ":"test"},"sig":"AAAA"}"#.to_string();
302 let entry = Entry::from_json(json).unwrap();
303 assert_eq!(entry.now, 12345);
304 }
305
306 #[test]
307 fn entry_raw_json_preserves_bytes() {
308 let json = r#"{"pay":{"now":12345,"typ":"test"},"sig":"AAAA"}"#.to_string();
309 let entry = Entry::from_json(json.clone()).unwrap();
310 assert_eq!(entry.raw_json(), json);
311 }
312
313 #[test]
314 fn entry_pay_bytes_extracts_exact_bytes() {
315 let json = r#"{"pay":{"now":12345,"typ":"test"},"sig":"AAAA"}"#.to_string();
316 let entry = Entry::from_json(json).unwrap();
317 let pay_bytes = entry.pay_bytes().unwrap();
318 assert_eq!(
319 String::from_utf8(pay_bytes).unwrap(),
320 r#"{"now":12345,"typ":"test"}"#
321 );
322 }
323
324 #[test]
325 fn entry_missing_now_fails() {
326 let json = r#"{"pay":{"typ":"test"},"sig":"AAAA"}"#.to_string();
327 let result = Entry::from_json(json);
328 assert!(matches!(result, Err(EntryError::MissingNow)));
329 }
330
331 #[test]
332 fn entry_missing_pay_fails() {
333 let json = r#"{"sig":"AAAA"}"#.to_string();
334 let result = Entry::from_json(json);
335 assert!(matches!(result, Err(EntryError::MissingNow)));
336 }
337
338 #[test]
339 fn entry_invalid_json_fails() {
340 let json = "not json".to_string();
341 let result = Entry::from_json(json);
342 assert!(matches!(result, Err(EntryError::InvalidJson)));
343 }
344}