Skip to main content

nookdb_core/codec/
doc.rs

1//! Schema-driven JSON document codec and the `ValueCodec` byte-transform seam.
2//!
3//! The `ValueCodec` seam sits at the storage write/read boundary
4//! (JSON bytes <-> stored bytes). The default is identity (plain JSON on
5//! disk). A future codec can inject here WITHOUT modifying
6//! this crate (extension seam §6a). No Pro code lives here.
7use serde_json::Value;
8
9use crate::error::NookError;
10use crate::schema::ir::CollectionIr;
11
12/// Seam for transforming stored values at the storage read/write boundary.
13///
14/// Implement this in an external crate to inject an alternate codec (e.g.
15/// at-rest encryption) without forking or modifying nookdb-core. The default
16/// is [`IdentityCodec`] (pass-through → plain JSON on disk).
17///
18/// # Examples
19///
20/// ```
21/// use nookdb_core::{ValueCodec, IdentityCodec};
22///
23/// // The default codec is a pass-through.
24/// let codec = IdentityCodec;
25/// assert_eq!(codec.encode(b"hi"), b"hi");
26/// assert_eq!(codec.decode(b"hi").unwrap(), b"hi");
27///
28/// // An external crate can implement the seam against the PUBLIC path.
29/// struct Xor7;
30/// impl ValueCodec for Xor7 {
31///     fn encode(&self, v: &[u8]) -> Vec<u8> {
32///         v.iter().map(|b| b ^ 7).collect()
33///     }
34///     fn decode(&self, v: &[u8]) -> Result<Vec<u8>, nookdb_core::NookError> {
35///         Ok(v.iter().map(|b| b ^ 7).collect())
36///     }
37/// }
38/// let x = Xor7;
39/// assert_eq!(x.decode(&x.encode(b"abc")).unwrap(), b"abc");
40/// ```
41pub trait ValueCodec: Send + Sync {
42    fn encode(&self, value: &[u8]) -> Vec<u8>;
43    /// # Errors
44    /// Returns `NookError::Corruption` if stored bytes cannot be decoded.
45    fn decode(&self, stored: &[u8]) -> Result<Vec<u8>, NookError>;
46}
47
48/// The free-tier default: bytes pass through unchanged → plain JSON on disk.
49pub struct IdentityCodec;
50impl ValueCodec for IdentityCodec {
51    fn encode(&self, value: &[u8]) -> Vec<u8> {
52        value.to_vec()
53    }
54    fn decode(&self, stored: &[u8]) -> Result<Vec<u8>, NookError> {
55        Ok(stored.to_vec())
56    }
57}
58
59/// Encodes a validated document to stored bytes via the `ValueCodec` seam.
60///
61/// Serializes `doc` to canonical JSON bytes, then passes them through
62/// `codec.encode`. `_c` is reserved for decode-time coercion of future
63/// non-JSON-native types (unused in M2, where all field types carry their
64/// canonical JSON representation).
65///
66/// # Errors
67/// Returns `NookError::Schema` if `doc` is not serializable.
68pub fn encode_document(
69    _c: &CollectionIr,
70    doc: &Value,
71    codec: &dyn ValueCodec,
72) -> Result<Vec<u8>, NookError> {
73    let json = serde_json::to_vec(doc).map_err(|e| NookError::Schema {
74        msg: format!("cannot serialize document: {e}"),
75    })?;
76    Ok(codec.encode(&json))
77}
78
79/// Decodes stored bytes back to a document value (seam first, then JSON).
80///
81/// # Errors
82/// Returns `NookError::Corruption` if the stored bytes are not the
83/// JSON this codec produced.
84pub fn decode_document(
85    _c: &CollectionIr,
86    stored: &[u8],
87    codec: &dyn ValueCodec,
88) -> Result<Value, NookError> {
89    let json = codec.decode(stored)?;
90    serde_json::from_slice(&json).map_err(|e| NookError::Corruption {
91        msg: format!("corrupt document json: {e}"),
92    })
93}
94
95#[cfg(test)]
96mod doc_tests {
97    use super::*;
98    use crate::schema::ir::SchemaIr;
99    use serde_json::json;
100
101    fn ir() -> SchemaIr {
102        SchemaIr::compile(
103            r#"{"u":{"idField":"id","fields":[
104          {"name":"id","type":"id"},{"name":"name","type":"string"},
105          {"name":"born","type":"date"}],"indexes":[]}}"#,
106        )
107        .unwrap()
108    }
109
110    #[test]
111    fn json_round_trips_through_identity_codec() {
112        let s = ir();
113        let c = s.collection("u").unwrap();
114        let doc = json!({"id":"1","name":"Ali","born":"2026-05-19T00:00:00.000Z"});
115        let bytes = encode_document(c, &doc, &IdentityCodec).unwrap();
116        assert!(
117            serde_json::from_slice::<serde_json::Value>(&bytes).is_ok(),
118            "stored bytes must be valid JSON (debuggability goal)"
119        );
120        let back = decode_document(c, &bytes, &IdentityCodec).unwrap();
121        assert_eq!(back, doc);
122    }
123
124    #[test]
125    fn decode_fails_corruption_on_garbage() {
126        let s = ir();
127        let c = s.collection("u").unwrap();
128        let e = decode_document(c, b"\xff\xff", &IdentityCodec).unwrap_err();
129        assert_eq!(e.kind(), crate::error::NookErrorKind::Corruption);
130    }
131}
132
133#[cfg(test)]
134mod seam_tests {
135    use super::*;
136
137    #[test]
138    fn default_codec_is_identity_and_selected_via_seam() {
139        let codec: &dyn ValueCodec = &IdentityCodec;
140        let input = b"{\"a\":1}";
141        let stored = codec.encode(input);
142        assert_eq!(stored, input);
143        assert_eq!(codec.decode(&stored).unwrap(), input);
144    }
145
146    // Test-only stub proving the seam accepts an alternate codec.
147    // NOT Pro code — lives in the test module only.
148    struct ReverseCodec;
149    impl ValueCodec for ReverseCodec {
150        fn encode(&self, v: &[u8]) -> Vec<u8> {
151            v.iter().rev().copied().collect()
152        }
153        fn decode(&self, v: &[u8]) -> Result<Vec<u8>, crate::error::NookError> {
154            Ok(v.iter().rev().copied().collect())
155        }
156    }
157
158    #[test]
159    fn alternate_codec_swaps_in_through_public_seam_api() {
160        let codec: &dyn ValueCodec = &ReverseCodec;
161        let input = b"abc";
162        let stored = codec.encode(input);
163        assert_eq!(stored, b"cba");
164        assert_eq!(codec.decode(&stored).unwrap(), input);
165    }
166}