Skip to main content

modelvault_core/
validation.rs

1//! Validation at write time: types, nesting, and field constraints (0.6+).
2//!
3//! See [`docs/07_record_encoding_v2.md`](../../docs/07_record_encoding_v2.md) and [`ROADMAP.md`](../../ROADMAP.md).
4
5use regex::Regex;
6
7use crate::error::{DbError, ValidationError};
8use crate::record::RowValue;
9use crate::schema::{Constraint, FieldDef, Type};
10
11fn err(path: &[String], msg: impl Into<String>) -> DbError {
12    DbError::Validation(ValidationError {
13        path: path.to_vec(),
14        message: msg.into(),
15    })
16}
17
18/// Primary key types must be flat primitives (not optional/composite).
19pub fn ensure_pk_type_primitive(ty: &Type) -> Result<(), DbError> {
20    match ty {
21        Type::Bool
22        | Type::Int64
23        | Type::Uint64
24        | Type::Float64
25        | Type::String
26        | Type::Bytes
27        | Type::Uuid
28        | Type::Timestamp => Ok(()),
29        Type::Optional(_) | Type::List(_) | Type::Object(_) | Type::Enum(_) => {
30            Err(DbError::Validation(ValidationError {
31                path: vec![],
32                message:
33                    "primary key field must use a primitive type (not optional/list/object/enum)"
34                        .into(),
35            }))
36        }
37    }
38}
39
40/// Whether a missing map key is treated as absent (`Optional` only).
41pub fn allows_absent_root(ty: &Type) -> bool {
42    matches!(ty, Type::Optional(_))
43}
44
45/// Validate a row value against `ty` and apply `constraints`.
46pub fn validate_value(
47    path: &mut Vec<String>,
48    ty: &Type,
49    constraints: &[Constraint],
50    v: &RowValue,
51) -> Result<(), DbError> {
52    match ty {
53        Type::Optional(inner) => {
54            if matches!(v, RowValue::None) {
55                return Ok(());
56            }
57            validate_value(path, inner, &[], v)?;
58            apply_constraints(path, ty, constraints, v)
59        }
60        Type::Bool => {
61            let RowValue::Bool(_) = v else {
62                return Err(err(path, "expected bool"));
63            };
64            apply_constraints(path, ty, constraints, v)
65        }
66        Type::Int64 => {
67            let RowValue::Int64(_) = v else {
68                return Err(err(path, "expected int64"));
69            };
70            apply_constraints(path, ty, constraints, v)
71        }
72        Type::Uint64 => {
73            let RowValue::Uint64(_) = v else {
74                return Err(err(path, "expected uint64"));
75            };
76            apply_constraints(path, ty, constraints, v)
77        }
78        Type::Float64 => {
79            let RowValue::Float64(_) = v else {
80                return Err(err(path, "expected float64"));
81            };
82            apply_constraints(path, ty, constraints, v)
83        }
84        Type::String => {
85            let RowValue::String(_) = v else {
86                return Err(err(path, "expected string"));
87            };
88            apply_constraints(path, ty, constraints, v)
89        }
90        Type::Bytes => {
91            let RowValue::Bytes(_) = v else {
92                return Err(err(path, "expected bytes"));
93            };
94            apply_constraints(path, ty, constraints, v)
95        }
96        Type::Uuid => {
97            let RowValue::Uuid(_) = v else {
98                return Err(err(path, "expected uuid"));
99            };
100            apply_constraints(path, ty, constraints, v)
101        }
102        Type::Timestamp => {
103            let RowValue::Timestamp(_) = v else {
104                return Err(err(path, "expected timestamp"));
105            };
106            apply_constraints(path, ty, constraints, v)
107        }
108        Type::List(inner) => {
109            let RowValue::List(items) = v else {
110                return Err(err(path, "expected list"));
111            };
112            for (i, item) in items.iter().enumerate() {
113                path.push(format!("{i}"));
114                validate_value(path, inner, &[], item)?;
115                path.pop();
116            }
117            apply_constraints(path, ty, constraints, v)
118        }
119        Type::Object(fields) => {
120            let RowValue::Object(m) = v else {
121                return Err(err(path, "expected object"));
122            };
123            for sub in fields {
124                let key = sub.path.0[0].to_string();
125                let absent_ok = allows_absent_root(&sub.ty);
126                let none = RowValue::None;
127                let child: &RowValue = match m.get(&key) {
128                    None if absent_ok => &none,
129                    None => {
130                        path.push(key.clone());
131                        return Err(err(path, "missing object field"));
132                    }
133                    Some(x) => x,
134                };
135                path.push(key);
136                validate_value(path, &sub.ty, &sub.constraints, child)?;
137                path.pop();
138            }
139            for k in m.keys() {
140                if !fields.iter().any(|f| f.path.0[0].as_ref() == k.as_str()) {
141                    path.push(k.clone());
142                    return Err(err(path, "unknown field in object"));
143                }
144            }
145            apply_constraints(path, ty, constraints, v)
146        }
147        Type::Enum(variants) => {
148            let RowValue::String(s) = v else {
149                return Err(err(path, "expected string (enum)"));
150            };
151            if !variants.iter().any(|x| x == s) {
152                return Err(err(
153                    path,
154                    format!("enum value must be one of {:?}", variants),
155                ));
156            }
157            apply_constraints(path, ty, constraints, v)
158        }
159    }
160}
161
162fn must_int64(path: &[String], v: &RowValue, requirement: &'static str) -> Result<i64, DbError> {
163    let RowValue::Int64(n) = v else {
164        return Err(err(path, requirement));
165    };
166    Ok(*n)
167}
168
169fn must_uint64(path: &[String], v: &RowValue, requirement: &'static str) -> Result<u64, DbError> {
170    let RowValue::Uint64(n) = v else {
171        return Err(err(path, requirement));
172    };
173    Ok(*n)
174}
175
176fn must_f64(path: &[String], v: &RowValue, requirement: &'static str) -> Result<f64, DbError> {
177    let RowValue::Float64(n) = v else {
178        return Err(err(path, requirement));
179    };
180    Ok(*n)
181}
182
183fn constrain_min_i64(path: &[String], n: i64, min: i64) -> Result<(), DbError> {
184    if n < min {
185        Err(err(path, format!("value {n} is below minimum {min}")))
186    } else {
187        Ok(())
188    }
189}
190
191fn constrain_max_i64(path: &[String], n: i64, max: i64) -> Result<(), DbError> {
192    if n > max {
193        Err(err(path, format!("value {n} is above maximum {max}")))
194    } else {
195        Ok(())
196    }
197}
198
199fn constrain_min_u64(path: &[String], n: u64, min: u64) -> Result<(), DbError> {
200    if n < min {
201        Err(err(path, format!("value {n} is below minimum {min}")))
202    } else {
203        Ok(())
204    }
205}
206
207fn constrain_max_u64(path: &[String], n: u64, max: u64) -> Result<(), DbError> {
208    if n > max {
209        Err(err(path, format!("value {n} is above maximum {max}")))
210    } else {
211        Ok(())
212    }
213}
214
215fn constrain_min_f64(path: &[String], n: f64, min: f64) -> Result<(), DbError> {
216    if n < min {
217        Err(err(path, format!("value {n} is below minimum {min}")))
218    } else {
219        Ok(())
220    }
221}
222
223fn constrain_max_f64(path: &[String], n: f64, max: f64) -> Result<(), DbError> {
224    if n > max {
225        Err(err(path, format!("value {n} is above maximum {max}")))
226    } else {
227        Ok(())
228    }
229}
230
231fn constrain_min_byte_len(
232    path: &[String],
233    len: usize,
234    min: u64,
235    kind: &str,
236) -> Result<(), DbError> {
237    if (len as u64) < min {
238        Err(err(
239            path,
240            format!("{kind} length {len} is below minimum {min}"),
241        ))
242    } else {
243        Ok(())
244    }
245}
246
247fn constrain_max_byte_len(
248    path: &[String],
249    len: usize,
250    max: u64,
251    kind: &str,
252) -> Result<(), DbError> {
253    if (len as u64) > max {
254        Err(err(
255            path,
256            format!("{kind} length {len} is above maximum {max}"),
257        ))
258    } else {
259        Ok(())
260    }
261}
262
263fn apply_constraints(
264    path: &[String],
265    _ty: &Type,
266    constraints: &[Constraint],
267    v: &RowValue,
268) -> Result<(), DbError> {
269    for c in constraints {
270        match c {
271            Constraint::MinI64(min) => {
272                let n = must_int64(path, v, "MinI64 constraint requires int64")?;
273                constrain_min_i64(path, n, *min)?;
274            }
275            Constraint::MaxI64(max) => {
276                let n = must_int64(path, v, "MaxI64 constraint requires int64")?;
277                constrain_max_i64(path, n, *max)?;
278            }
279            Constraint::MinU64(min) => {
280                let n = must_uint64(path, v, "MinU64 constraint requires uint64")?;
281                constrain_min_u64(path, n, *min)?;
282            }
283            Constraint::MaxU64(max) => {
284                let n = must_uint64(path, v, "MaxU64 constraint requires uint64")?;
285                constrain_max_u64(path, n, *max)?;
286            }
287            Constraint::MinF64(min) => {
288                let n = must_f64(path, v, "MinF64 constraint requires float64")?;
289                constrain_min_f64(path, n, *min)?;
290            }
291            Constraint::MaxF64(max) => {
292                let n = must_f64(path, v, "MaxF64 constraint requires float64")?;
293                constrain_max_f64(path, n, *max)?;
294            }
295            Constraint::MinLength(min) => match v {
296                RowValue::String(s) => constrain_min_byte_len(path, s.len(), *min, "string")?,
297                RowValue::Bytes(b) => constrain_min_byte_len(path, b.len(), *min, "bytes")?,
298                RowValue::List(items) => constrain_min_byte_len(path, items.len(), *min, "list")?,
299                _ => return Err(err(path, "MinLength applies to string, bytes, or list")),
300            },
301            Constraint::MaxLength(max) => match v {
302                RowValue::String(s) => constrain_max_byte_len(path, s.len(), *max, "string")?,
303                RowValue::Bytes(b) => constrain_max_byte_len(path, b.len(), *max, "bytes")?,
304                RowValue::List(items) => constrain_max_byte_len(path, items.len(), *max, "list")?,
305                _ => return Err(err(path, "MaxLength applies to string, bytes, or list")),
306            },
307            Constraint::Regex(pattern) => {
308                let RowValue::String(s) = v else {
309                    return Err(err(path, "Regex constraint requires string"));
310                };
311                let re = Regex::new(pattern).map_err(|e| {
312                    DbError::Validation(ValidationError {
313                        path: path.to_vec(),
314                        message: format!("invalid regex in schema: {e}"),
315                    })
316                })?;
317                if !re.is_match(s) {
318                    return Err(err(path, "string does not match regex"));
319                }
320            }
321            Constraint::Email => {
322                let RowValue::String(s) = v else {
323                    return Err(err(path, "Email constraint requires string"));
324                };
325                if !s.contains('@') || !s.contains('.') {
326                    return Err(err(path, "string is not a valid email shape"));
327                }
328            }
329            Constraint::Url => {
330                let RowValue::String(s) = v else {
331                    return Err(err(path, "Url constraint requires string"));
332                };
333                if !s.starts_with("http://") && !s.starts_with("https://") {
334                    return Err(err(path, "string must be an http(s) URL"));
335                }
336            }
337            Constraint::NonEmpty => match v {
338                RowValue::String(s) if s.is_empty() => {
339                    return Err(err(path, "string must be non-empty"));
340                }
341                RowValue::Bytes(b) if b.is_empty() => {
342                    return Err(err(path, "bytes must be non-empty"));
343                }
344                RowValue::List(items) if items.is_empty() => {
345                    return Err(err(path, "list must be non-empty"));
346                }
347                RowValue::String(_) | RowValue::Bytes(_) | RowValue::List(_) => {}
348                _ => return Err(err(path, "NonEmpty applies to string, bytes, or list")),
349            },
350        }
351    }
352
353    Ok(())
354}
355
356/// Validate top-level insert row: unknown fields, missing fields, types, constraints.
357/// `row` must contain every top-level field (including the primary key).
358pub fn validate_top_level_row(
359    fields: &[FieldDef],
360    pk_name: &str,
361    row: &std::collections::BTreeMap<String, RowValue>,
362) -> Result<(), DbError> {
363    for k in row.keys() {
364        if !fields
365            .iter()
366            .any(|f| f.path.0.len() == 1 && f.path.0[0].as_ref() == k.as_str())
367        {
368            return Err(DbError::Validation(ValidationError {
369                path: vec![k.clone()],
370                message: "unknown field".into(),
371            }));
372        }
373    }
374
375    for def in fields {
376        let name = def.path.0[0].to_string();
377        if name == pk_name {
378            continue;
379        }
380        let absent_ok = allows_absent_root(&def.ty);
381        let none = RowValue::None;
382        let v: &RowValue = match row.get(&name) {
383            None if absent_ok => &none,
384            None => {
385                return Err(DbError::Validation(ValidationError {
386                    path: vec![name.clone()],
387                    message: "missing field".into(),
388                }));
389            }
390            Some(x) => x,
391        };
392        if matches!(v, RowValue::None) && !absent_ok {
393            return Err(DbError::Validation(ValidationError {
394                path: vec![name.clone()],
395                message: "unexpected null for required field".into(),
396            }));
397        }
398        let mut path = vec![name.clone()];
399        validate_value(&mut path, &def.ty, &def.constraints, v)?;
400    }
401    Ok(())
402}
403
404/// Validate a row against a multi-segment schema (types, constraints, and unknown paths).
405pub fn validate_multiseg_row(
406    fields: &[FieldDef],
407    pk_name: &str,
408    row: &std::collections::BTreeMap<String, RowValue>,
409) -> Result<(), DbError> {
410    crate::db::validate_unknown_fields_for_multiseg_schema(fields, pk_name, row)?;
411    for def in fields {
412        if def.path.0.len() == 1 && def.path.0[0] == pk_name {
413            continue;
414        }
415        let mut path: Vec<String> = def.path.0.iter().map(|s| s.as_ref().to_string()).collect();
416        let absent_ok = allows_absent_root(&def.ty);
417        let v = match crate::db::row_value_at_path(row, &def.path.0) {
418            Some(x) => x,
419            None if absent_ok => RowValue::None,
420            None => {
421                return Err(DbError::Schema(
422                    crate::error::SchemaError::RowMissingField {
423                        name: path.join("."),
424                    },
425                ));
426            }
427        };
428        if matches!(v, RowValue::None) && !absent_ok {
429            return Err(DbError::Validation(ValidationError {
430                path: path.clone(),
431                message: "unexpected null for required field".into(),
432            }));
433        }
434        validate_value(&mut path, &def.ty, &def.constraints, &v)?;
435    }
436    Ok(())
437}
438
439#[cfg(test)]
440mod constraint_helper_cover_tests {
441    use super::*;
442    use crate::error::DbError;
443
444    #[test]
445    fn constrain_helpers_accept_in_range_values() {
446        let path = vec!["z".into()];
447        constrain_min_i64(&path, 5, 1).unwrap();
448        constrain_max_i64(&path, 1, 10).unwrap();
449        constrain_min_u64(&path, 5, 1).unwrap();
450        constrain_max_u64(&path, 1, 10).unwrap();
451        constrain_min_f64(&path, 5.0, 1.0).unwrap();
452        constrain_max_f64(&path, 1.0, 10.0).unwrap();
453        constrain_min_byte_len(&path, "abcde".len(), 1, "string").unwrap();
454        constrain_max_byte_len(&path, "ab".len(), 10, "string").unwrap();
455        constrain_min_byte_len(&path, vec![1u8, 2, 3].len(), 2, "bytes").unwrap();
456        constrain_max_byte_len(&path, vec![1u8].len(), 4, "bytes").unwrap();
457    }
458
459    #[test]
460    fn constrain_max_numeric_helpers_surface_above_max_messages() {
461        let path = vec!["x".into()];
462
463        let e = constrain_max_i64(&path, 3, 1).unwrap_err();
464        assert!(matches!(
465            &e,
466            DbError::Validation(v) if v.path == path && v.message.contains("above maximum"),
467        ));
468
469        let e = constrain_max_u64(&path, 5, 1).unwrap_err();
470        assert!(matches!(
471            &e,
472            DbError::Validation(v) if v.message.contains("above maximum"),
473        ));
474
475        let e = constrain_max_f64(&path, 3.5, 1.25).unwrap_err();
476        assert!(matches!(
477            &e,
478            DbError::Validation(v) if v.message.contains("above maximum"),
479        ));
480    }
481
482    #[test]
483    fn constrain_max_byte_len_string_and_bytes_surface_above_max() {
484        let path = vec!["f".into()];
485
486        let e = constrain_max_byte_len(&path, "ab".len(), 1, "string").unwrap_err();
487        assert!(matches!(
488            &e,
489            DbError::Validation(v) if v.message.contains("above maximum"),
490        ));
491
492        let e = constrain_max_byte_len(&path, vec![1u8, 2].len(), 1, "bytes").unwrap_err();
493        assert!(matches!(
494            &e,
495            DbError::Validation(v) if v.message.contains("above maximum"),
496        ));
497    }
498}