Skip to main content

nookdb_core/schema/
validate.rs

1//! Authoritative document validation against a `CollectionIr`.
2use serde_json::Value;
3
4use crate::error::NookError;
5use crate::schema::ir::{CollectionIr, FieldIr, FieldType};
6
7/// Validates `doc` against `c`. The Rust core is the sole authority
8/// (PRD §3); TS has already applied id/defaults before this runs.
9///
10/// # Errors
11/// Returns `NookError::Schema` on any type mismatch, constraint
12/// violation, unknown field, or missing required field.
13pub fn validate_document(c: &CollectionIr, doc: &Value) -> Result<(), NookError> {
14    let obj = doc.as_object().ok_or_else(|| NookError::Schema {
15        msg: "document must be an object".into(),
16    })?;
17
18    for key in obj.keys() {
19        if c.field(key).is_none() {
20            return Err(NookError::Schema {
21                msg: format!("unknown field {key:?}"),
22            });
23        }
24    }
25
26    for f in &c.fields {
27        match obj.get(&f.name) {
28            None => {
29                if !f.optional {
30                    return Err(NookError::Schema {
31                        msg: format!("missing required field {:?}", f.name),
32                    });
33                }
34            }
35            Some(Value::Null) => {
36                if !f.nullable {
37                    return Err(NookError::Schema {
38                        msg: format!("field {:?} is not nullable", f.name),
39                    });
40                }
41            }
42            Some(v) => check_field(f, v)?,
43        }
44    }
45
46    Ok(())
47}
48
49fn check_field(f: &FieldIr, v: &Value) -> Result<(), NookError> {
50    check_value(&f.ty, f, v, &f.name)
51}
52
53/// Recursive value checker. Carries the original [`FieldIr`] for constraint
54/// access (min/max/email/regex/variants) and a slash/index-tagged `path`
55/// for human-readable error messages on nested values.
56///
57/// Per-item constraints on inner array types are out of scope for M5c: the
58/// `s.array(s.string())` surface attaches `min/.max/.regex/.email` only to
59/// the outer field, so the inner recursion intentionally reuses the same
60/// `FieldIr` constraint slots (they are no-ops on the inner type for the
61/// shapes the JS surface can produce — `s.array(s.string().min(1))` is not
62/// yet on the TS API).
63fn check_value(ty: &FieldType, f: &FieldIr, v: &Value, path: &str) -> Result<(), NookError> {
64    let bad = |m: String| NookError::Schema { msg: m };
65
66    match ty {
67        FieldType::Id | FieldType::String => {
68            let s = v
69                .as_str()
70                .ok_or_else(|| bad(format!("field {path:?} must be a string")))?;
71
72            // Convert char count to f64 without precision loss.
73            // u32::MAX (4_294_967_295) is below 2^32, well within f64's exact integer range
74            // (2^53), so f64::from(u32) is lossless.  Strings longer than u32::MAX chars
75            // are clamped to u32::MAX, which will correctly fail any sane max bound.
76            //
77            // CONTRACT: length is measured in Unicode scalar values (Rust `char`s).
78            // This is the authoritative definition (PRD §3 — the Rust core is the sole
79            // validation authority). It may differ from a JS schema author's UTF-16
80            // `String.length` (and from grapheme-cluster intuition) for astral/combining
81            // characters; the Rust scalar count is canonical by fiat, not a bug.
82            let char_count = s.chars().count();
83            let len_f64 = f64::from(u32::try_from(char_count).unwrap_or(u32::MAX));
84
85            if f.min.is_some_and(|m| len_f64 < m) || f.max.is_some_and(|m| len_f64 > m) {
86                return Err(bad(format!("field {path:?} length out of range")));
87            }
88            if f.email && !s.contains('@') {
89                return Err(bad(format!("field {path:?} must be an email")));
90            }
91            if let Some(re_src) = &f.regex {
92                let re = regex::Regex::new(re_src)
93                    .map_err(|e| bad(format!("field {path:?} has invalid regex pattern: {e}")))?;
94                if !re.is_match(s) {
95                    return Err(bad(format!("field {path:?} does not match pattern")));
96                }
97            }
98        }
99        FieldType::Number => {
100            let n = v
101                .as_f64()
102                .ok_or_else(|| bad(format!("field {path:?} must be a number")))?;
103            // For |n| >= 2^53, f64 cannot represent a fractional part, so `fract()` is
104            // always 0.0 and such values pass the integer check. This is inherent to
105            // JSON's f64 number model (serde_json default features) and out of M2 scope;
106            // `max` is the intended guard for out-of-range magnitudes. NaN/Infinity are
107            // unreachable here — serde_json rejects them at parse time.
108            if f.int && n.fract() != 0.0 {
109                return Err(bad(format!("field {path:?} must be an integer")));
110            }
111            if f.min.is_some_and(|m| n < m) || f.max.is_some_and(|m| n > m) {
112                return Err(bad(format!("field {path:?} out of range")));
113            }
114        }
115        FieldType::Bool => {
116            if !v.is_boolean() {
117                return Err(bad(format!("field {path:?} must be a boolean")));
118            }
119        }
120        FieldType::Enum => {
121            let s = v
122                .as_str()
123                .ok_or_else(|| bad(format!("field {path:?} must be a string")))?;
124            if !f.variants.iter().any(|x| x == s) {
125                return Err(bad(format!("field {path:?} not a valid variant")));
126            }
127        }
128        FieldType::Date => {
129            // Schema-driven JSON stores Date as an ISO-8601 string.
130            if !v.is_string() {
131                return Err(bad(format!("field {path:?} must be an ISO date string")));
132            }
133        }
134        FieldType::Array(item_ty) => {
135            let arr = v
136                .as_array()
137                .ok_or_else(|| bad(format!("field {path:?} must be an array")))?;
138            for (i, item) in arr.iter().enumerate() {
139                check_value(item_ty, f, item, &format!("{path}[{i}]"))?;
140            }
141        }
142    }
143
144    Ok(())
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150    use crate::schema::ir::SchemaIr;
151    use serde_json::json;
152
153    fn ir() -> SchemaIr {
154        SchemaIr::compile(
155            r#"{"u":{"idField":"id","fields":[
156          {"name":"id","type":"id"},
157          {"name":"name","type":"string","min":1,"max":3},
158          {"name":"role","type":"enum","variants":["a","b"]},
159          {"name":"age","type":"number","int":true,"min":0,"optional":true}],
160          "indexes":[]}}"#,
161        )
162        .unwrap()
163    }
164
165    #[test]
166    fn accepts_valid_document() {
167        let c = ir();
168        let c = c.collection("u").unwrap();
169        validate_document(c, &json!({"id":"x","name":"Al","role":"a"})).unwrap();
170    }
171
172    #[test]
173    fn rejects_wrong_type() {
174        let c = ir();
175        let c = c.collection("u").unwrap();
176        let e = validate_document(c, &json!({"id":"x","name":5,"role":"a"})).unwrap_err();
177        assert_eq!(e.kind(), crate::error::NookErrorKind::Schema);
178    }
179
180    #[test]
181    fn rejects_string_too_long_and_bad_enum_and_missing_required() {
182        let c = ir();
183        let c = c.collection("u").unwrap();
184        assert!(validate_document(c, &json!({"id":"x","name":"AAAA","role":"a"})).is_err());
185        assert!(validate_document(c, &json!({"id":"x","name":"Al","role":"z"})).is_err());
186        assert!(validate_document(c, &json!({"id":"x","role":"a"})).is_err());
187    }
188
189    #[test]
190    fn allows_absent_optional_but_rejects_non_int_number() {
191        let c = ir();
192        let c = c.collection("u").unwrap();
193        validate_document(c, &json!({"id":"x","name":"Al","role":"a"})).unwrap();
194        assert!(validate_document(c, &json!({"id":"x","name":"Al","role":"a","age":1.5})).is_err());
195    }
196
197    proptest::proptest! {
198        #[test]
199        fn name_length_bound_is_enforced(s in ".*") {
200            let c = ir();
201            let c = c.collection("u").unwrap();
202            let r = validate_document(c, &json!({"id":"x","name":s,"role":"a"}));
203            let len = s.chars().count();
204            proptest::prop_assert_eq!(r.is_ok(), (1..=3).contains(&len));
205        }
206    }
207
208    #[test]
209    fn array_field_round_trip_succeeds() {
210        let ir = SchemaIr::compile(
211            r#"{"c":{"idField":"id","fields":[
212                  {"name":"id","type":"id"},
213                  {"name":"tags","type":"array","items":{"type":"string"}}],
214                  "indexes":[]}}"#,
215        )
216        .unwrap();
217        let c = ir.collection("c").unwrap();
218        validate_document(c, &json!({"id":"x","tags":["a","b","c"]})).unwrap();
219        validate_document(c, &json!({"id":"x","tags":[]})).unwrap(); // empty OK
220    }
221
222    #[test]
223    fn array_item_type_mismatch_rejected() {
224        let ir = SchemaIr::compile(
225            r#"{"c":{"idField":"id","fields":[
226                  {"name":"id","type":"id"},
227                  {"name":"tags","type":"array","items":{"type":"string"}}],
228                  "indexes":[]}}"#,
229        )
230        .unwrap();
231        let c = ir.collection("c").unwrap();
232        let e = validate_document(c, &json!({"id":"x","tags":["a", 42]})).unwrap_err();
233        let msg = e.to_string();
234        assert!(
235            msg.contains("tags[1]") || msg.contains("tags"),
236            "expected path-tagged error, got: {msg}"
237        );
238    }
239
240    #[test]
241    fn nested_array_validates_recursively() {
242        let ir = SchemaIr::compile(
243            r#"{"c":{"idField":"id","fields":[
244                  {"name":"id","type":"id"},
245                  {"name":"matrix","type":"array","items":{"type":"array","items":{"type":"number"}}}],
246                  "indexes":[]}}"#,
247        )
248        .unwrap();
249        let c = ir.collection("c").unwrap();
250        validate_document(c, &json!({"id":"x","matrix":[[1.0, 2.0],[3.0]]})).unwrap();
251        assert!(validate_document(c, &json!({"id":"x","matrix":[["not-a-number"]]})).is_err());
252    }
253}