Skip to main content

cyphr_storage/
lib.rs

1//! # Cyphr Storage
2//!
3//! Storage backends for the Cyphr identity protocol.
4//!
5//! This crate provides a backend-agnostic storage API for persisting
6//! Cyphr principals, cozies, and actions. The core `Store` trait
7//! defines the minimal interface that any storage backend must implement.
8//!
9//! ## Design Principles
10//!
11//! - **Storage is dumb**: The storage layer only handles bytes. All semantic
12//!   operations (verification, state computation) are handled by `cyphr`.
13//! - **Immutable history**: Entries are append-only; past entries are never modified.
14//! - **Order via `pre` chain**: Canonical order is derived from coz `pre`
15//!   field chaining, not storage order.
16//! - **Bit-perfect preservation**: Entries store original JSON bytes to ensure
17//!   correct `czd` computation. See `Entry` for details.
18//!
19//! ## Included Backends
20//!
21//! - [`FileStore`]: File-based storage using JSONL format (one file per principal).
22
23#![forbid(unsafe_code)]
24
25mod export;
26mod file;
27mod import;
28
29pub use export::{ExportError, PersistError, export_commits, export_entries, persist_entries};
30pub use file::{FileStore, FileStoreError};
31pub use import::{
32    Checkpoint, Genesis, LoadError, load_from_checkpoint, load_principal,
33    load_principal_from_commits,
34};
35
36use cyphr::state::PrincipalGenesis;
37use serde_json::value::RawValue;
38
39/// Storage backend trait.
40///
41/// Implementations provide persistence for signed Cyphr entries
42/// (cozies and actions). The trait is intentionally minimal:
43/// storage backends need only handle append and retrieval operations.
44pub trait Store {
45    /// The error type for this store implementation.
46    type Error: std::error::Error + Send + Sync + 'static;
47
48    /// Append a signed entry to the log.
49    fn append_entry(&self, pr: &PrincipalGenesis, entry: &Entry) -> Result<(), Self::Error>;
50
51    /// Retrieve all entries for a principal.
52    fn get_entries(&self, pr: &PrincipalGenesis) -> Result<Vec<Entry>, Self::Error>;
53
54    /// Retrieve entries with filtering (supports coz patches).
55    fn get_entries_range(
56        &self,
57        pr: &PrincipalGenesis,
58        opts: &QueryOpts,
59    ) -> Result<Vec<Entry>, Self::Error>;
60
61    /// Check if principal exists in storage.
62    fn exists(&self, pr: &PrincipalGenesis) -> Result<bool, Self::Error>;
63}
64
65/// Query options for filtered retrieval.
66#[derive(Default, Debug, Clone)]
67pub struct QueryOpts {
68    /// Only include entries with `now` > this timestamp.
69    pub after: Option<i64>,
70    /// Only include entries with `now` < this timestamp.
71    pub before: Option<i64>,
72    /// Maximum number of entries to return.
73    pub limit: Option<usize>,
74}
75
76/// A stored entry preserving bit-perfect JSON bytes.
77///
78/// **CRITICAL INVARIANT**: The original JSON string is preserved exactly as received.
79/// This ensures correct `czd` computation, which hashes the exact bytes of `pay`.
80///
81/// ## The Re-serialization Trap
82///
83/// A naive approach would parse JSON into `serde_json::Value`, then re-serialize
84/// for `czd` computation. This breaks signatures because re-serialization can change:
85/// - Field ordering
86/// - Whitespace
87/// - Number representation (e.g., `1.0` → `1`)
88///
89/// By storing `Box<RawValue>`, we preserve the original bytes and extract `pay`
90/// from the same source, ensuring bit-perfect fidelity.
91#[derive(Debug, Clone)]
92pub struct Entry {
93    /// The raw JSON string (bit-perfect, used for czd computation).
94    raw_json: Box<RawValue>,
95    /// The `now` timestamp extracted from pay.now (for filtering).
96    pub now: i64,
97}
98
99impl Entry {
100    /// Create an entry from a raw JSON string.
101    ///
102    /// This is the primary constructor for entries loaded from storage.
103    /// The original bytes are preserved exactly.
104    ///
105    /// # Errors
106    ///
107    /// Returns `EntryError::InvalidJson` if the string is not valid JSON.
108    /// Returns `EntryError::MissingNow` if `pay.now` is missing or not an integer.
109    pub fn from_json(json: String) -> Result<Self, EntryError> {
110        // Validate and convert to RawValue
111        let raw_json: Box<RawValue> =
112            serde_json::from_str(&json).map_err(|_| EntryError::InvalidJson)?;
113
114        // Extract timestamp for filtering
115        let now = Self::extract_now(&json)?;
116
117        Ok(Self { raw_json, now })
118    }
119
120    /// Create an entry from an owned RawValue.
121    ///
122    /// Useful when deserializing from a format that already provides RawValue.
123    pub fn from_raw_value(raw: Box<RawValue>) -> Result<Self, EntryError> {
124        let now = Self::extract_now(raw.get())?;
125        Ok(Self { raw_json: raw, now })
126    }
127
128    /// Create an entry from a serde_json::Value.
129    ///
130    /// **Warning**: This serializes the Value, which may not preserve original
131    /// byte ordering. Use only when creating new entries (e.g., during export),
132    /// not when loading from storage.
133    pub fn from_value(value: &serde_json::Value) -> Result<Self, EntryError> {
134        let json = serde_json::to_string(value).map_err(|_| EntryError::InvalidJson)?;
135        Self::from_json(json)
136    }
137
138    /// Get the raw JSON string.
139    ///
140    /// This returns the exact bytes stored, suitable for I/O operations.
141    pub fn raw_json(&self) -> &str {
142        self.raw_json.get()
143    }
144
145    /// Parse the entry as a serde_json::Value.
146    ///
147    /// Use this for field access (e.g., extracting `typ`, `key`).
148    /// **Do NOT use the resulting Value for czd computation** - use `pay_bytes()` instead.
149    pub fn as_value(&self) -> Result<serde_json::Value, EntryError> {
150        serde_json::from_str(self.raw_json.get()).map_err(|_| EntryError::InvalidJson)
151    }
152
153    /// Extract the `pay` field as raw bytes, preserving exact byte sequence.
154    ///
155    /// This is the critical method for `czd` computation. It extracts the `pay`
156    /// field from the original JSON, preserving exact bytes including whitespace
157    /// and field ordering.
158    ///
159    /// # Implementation Note
160    ///
161    /// We parse the raw JSON into a structure with RawValue for the pay field,
162    /// then return those bytes. This ensures we're extracting from the preserved
163    /// original, not re-serializing.
164    pub fn pay_bytes(&self) -> Result<Vec<u8>, EntryError> {
165        // Parse with pay as RawValue to preserve its exact bytes
166        #[derive(serde::Deserialize)]
167        struct PayExtractor<'a> {
168            #[serde(borrow)]
169            pay: &'a RawValue,
170        }
171
172        let extractor: PayExtractor =
173            serde_json::from_str(self.raw_json.get()).map_err(|_| EntryError::MissingPay)?;
174
175        Ok(extractor.pay.get().as_bytes().to_vec())
176    }
177
178    /// Extract `pay.now` timestamp from JSON string.
179    fn extract_now(json: &str) -> Result<i64, EntryError> {
180        #[derive(serde::Deserialize)]
181        struct PayNow {
182            now: i64,
183        }
184        #[derive(serde::Deserialize)]
185        struct NowExtractor {
186            pay: PayNow,
187        }
188
189        let extractor: NowExtractor =
190            serde_json::from_str(json).map_err(|_| EntryError::MissingNow)?;
191        Ok(extractor.pay.now)
192    }
193}
194
195/// Key material for a commit (SPEC §6).
196///
197/// Keys are stored at commit level per SPEC §5.2/§5.3, not embedded
198/// per-coz. Optional fields (`tag`, `now`) are semantically
199/// optional — not all keys have human-readable labels or creation
200/// timestamps at export time.
201#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
202pub struct KeyEntry {
203    /// Algorithm identifier (e.g., "ES256").
204    pub alg: String,
205    /// Public key (base64url-encoded).
206    #[serde(rename = "pub")]
207    pub pub_key: String,
208    /// Key thumbprint (base64url-encoded).
209    pub tmb: String,
210    /// Optional human-readable label.
211    #[serde(skip_serializing_if = "Option::is_none")]
212    pub tag: Option<String>,
213    /// Optional creation timestamp (maps to Key.first_seen).
214    #[serde(skip_serializing_if = "Option::is_none")]
215    pub now: Option<i64>,
216}
217
218/// A stored commit bundle for the commit-based JSONL format.
219///
220/// Each line in the JSONL file represents one finalized commit containing:
221/// - `cozies`: Array of coz entries (signed coz messages)
222/// - `keys`: Key material introduced in this commit (SPEC §5.2/§5.3)
223/// - `commit_id`: Commit ID (Merkle root of commit's coz czds)
224/// - `ar`: Auth Root (derived from KR)
225/// - `sr`: State Root (derived from AR and DR?)
226/// - `pr`: Principal Root (derived from SR and CR)
227///
228/// The derived state digests enable efficient indexing and verification
229/// without replaying the full coz history.
230#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
231pub struct CommitEntry {
232    /// ParsedCoz entries in this commit bundle.
233    #[serde(rename = "txs")]
234    pub cozies: Vec<serde_json::Value>,
235    /// Key material introduced in this commit.
236    pub keys: Vec<KeyEntry>,
237    /// Commit ID (per-commit Merkle root of coz czds).
238    #[serde(alias = "ts")]
239    pub commit_id: String,
240    /// Auth Root after this commit.
241    #[serde(rename = "ar")]
242    pub auth_root: String,
243    /// State Root: MR(AR, DR?).
244    #[serde(alias = "cs")]
245    pub sr: String,
246    /// Principal Root after this commit.
247    pub pr: String,
248}
249
250impl CommitEntry {
251    /// Create a new commit entry from components.
252    pub fn new(
253        cozies: Vec<serde_json::Value>,
254        keys: Vec<KeyEntry>,
255        commit_id: String,
256        auth_root: String,
257        sr: String,
258        pr: String,
259    ) -> Self {
260        Self {
261            cozies,
262            keys,
263            commit_id,
264            auth_root,
265            sr,
266            pr,
267        }
268    }
269
270    /// Get the raw JSON string for this commit entry.
271    pub fn to_json(&self) -> Result<String, serde_json::Error> {
272        serde_json::to_string(self)
273    }
274
275    /// Parse a commit entry from a JSON string.
276    pub fn from_json(json: &str) -> Result<Self, serde_json::Error> {
277        serde_json::from_str(json)
278    }
279}
280
281/// Errors that can occur when working with entries.
282#[derive(Debug, thiserror::Error)]
283pub enum EntryError {
284    /// Entry is not valid JSON.
285    #[error("invalid JSON")]
286    InvalidJson,
287    /// Entry is missing the required `pay.now` field.
288    #[error("entry missing pay.now field")]
289    MissingNow,
290    /// Entry is missing the required `pay` field.
291    #[error("entry missing pay field")]
292    MissingPay,
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298
299    #[test]
300    fn entry_from_json_extracts_now() {
301        let json = r#"{"pay":{"now":12345,"typ":"test"},"sig":"AAAA"}"#.to_string();
302        let entry = Entry::from_json(json).unwrap();
303        assert_eq!(entry.now, 12345);
304    }
305
306    #[test]
307    fn entry_raw_json_preserves_bytes() {
308        let json = r#"{"pay":{"now":12345,"typ":"test"},"sig":"AAAA"}"#.to_string();
309        let entry = Entry::from_json(json.clone()).unwrap();
310        assert_eq!(entry.raw_json(), json);
311    }
312
313    #[test]
314    fn entry_pay_bytes_extracts_exact_bytes() {
315        let json = r#"{"pay":{"now":12345,"typ":"test"},"sig":"AAAA"}"#.to_string();
316        let entry = Entry::from_json(json).unwrap();
317        let pay_bytes = entry.pay_bytes().unwrap();
318        assert_eq!(
319            String::from_utf8(pay_bytes).unwrap(),
320            r#"{"now":12345,"typ":"test"}"#
321        );
322    }
323
324    #[test]
325    fn entry_missing_now_fails() {
326        let json = r#"{"pay":{"typ":"test"},"sig":"AAAA"}"#.to_string();
327        let result = Entry::from_json(json);
328        assert!(matches!(result, Err(EntryError::MissingNow)));
329    }
330
331    #[test]
332    fn entry_missing_pay_fails() {
333        let json = r#"{"sig":"AAAA"}"#.to_string();
334        let result = Entry::from_json(json);
335        assert!(matches!(result, Err(EntryError::MissingNow)));
336    }
337
338    #[test]
339    fn entry_invalid_json_fails() {
340        let json = "not json".to_string();
341        let result = Entry::from_json(json);
342        assert!(matches!(result, Err(EntryError::InvalidJson)));
343    }
344}