1use std::collections::BTreeMap;
3
4use serde::Deserialize;
5
6use crate::error::NookError;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum FieldType {
11 Id,
13 String,
15 Number,
17 Bool,
19 Enum,
21 Date,
23 Array(Box<Self>),
25}
26
27#[allow(clippy::struct_excessive_bools)]
31#[derive(Debug, Clone)]
32pub struct FieldIr {
33 pub name: String,
35 pub ty: FieldType,
37 pub optional: bool,
39 pub nullable: bool,
41 pub min: Option<f64>,
43 pub max: Option<f64>,
45 pub int: bool,
47 pub email: bool,
49 pub regex: Option<String>,
53 pub variants: Vec<String>,
55}
56
57#[derive(Debug, Clone)]
59pub struct IndexIr {
60 pub field: String,
62 pub unique: bool,
64}
65
66#[derive(Debug, Clone)]
68pub struct CollectionIr {
69 pub id_field: String,
71 pub fields: Vec<FieldIr>,
73 pub indexes: Vec<IndexIr>,
75}
76
77impl CollectionIr {
78 #[must_use]
80 pub fn field(&self, name: &str) -> Option<&FieldIr> {
81 self.fields.iter().find(|f| f.name == name)
82 }
83}
84
85#[derive(Debug, Clone)]
88pub struct SchemaIr {
89 collections: BTreeMap<String, CollectionIr>,
90}
91
92#[allow(clippy::struct_excessive_bools)]
96#[derive(Deserialize)]
97struct RawField {
98 #[serde(default)]
103 name: String,
104 #[serde(rename = "type")]
105 ty: String,
106 #[serde(default)]
107 optional: bool,
108 #[serde(default)]
109 nullable: bool,
110 min: Option<f64>,
111 max: Option<f64>,
112 #[serde(default)]
113 int: bool,
114 #[serde(default)]
115 email: bool,
116 #[serde(default)]
117 regex: Option<String>,
118 #[serde(default)]
119 variants: Vec<String>,
120 items: Option<Box<Self>>,
122}
123
124#[derive(Deserialize)]
125struct RawIndex {
126 field: String,
127 #[serde(default)]
128 unique: bool,
129}
130
131#[derive(Deserialize)]
132struct RawCollection {
133 #[serde(rename = "idField")]
134 id_field: String,
135 fields: Vec<RawField>,
136 #[serde(default)]
137 indexes: Vec<RawIndex>,
138}
139
140impl SchemaIr {
143 pub fn compile(descriptor_json: &str) -> Result<Self, NookError> {
154 let raw: BTreeMap<String, RawCollection> =
155 serde_json::from_str(descriptor_json).map_err(|e| NookError::Schema {
156 msg: format!("invalid descriptor: {e}"),
157 })?;
158
159 let mut collections = BTreeMap::new();
160 for (cname, rc) in raw {
161 if cname == "_meta" {
167 return Err(NookError::Schema {
168 msg: r#"collection name "_meta" is reserved (migration ledger)"#.to_string(),
169 });
170 }
171 let fields = Self::compile_fields(&cname, &rc.fields)?;
172 Self::validate_id_field(&cname, &rc.id_field, &fields)?;
173 let indexes = Self::compile_indexes(&cname, &rc.indexes, &fields)?;
174 collections.insert(
175 cname,
176 CollectionIr {
177 id_field: rc.id_field,
178 fields,
179 indexes,
180 },
181 );
182 }
183 Ok(Self { collections })
184 }
185
186 #[must_use]
188 pub fn collection(&self, name: &str) -> Option<&CollectionIr> {
189 self.collections.get(name)
190 }
191
192 #[must_use]
206 pub fn schema_hash(&self) -> [u8; 32] {
207 use sha2::{Digest, Sha256};
208
209 fn feed<D: Digest>(h: &mut D, bytes: &[u8]) {
212 let len = u64::try_from(bytes.len()).expect("schema component length fits in u64");
213 h.update(len.to_le_bytes());
214 h.update(bytes);
215 }
216
217 fn feed_field_type<D: Digest>(h: &mut D, ft: &FieldType) {
223 match ft {
224 FieldType::Id => h.update([0x01u8]),
225 FieldType::String => h.update([0x02u8]),
226 FieldType::Number => h.update([0x03u8]),
227 FieldType::Bool => h.update([0x04u8]),
228 FieldType::Date => h.update([0x05u8]),
229 FieldType::Enum => h.update([0x06u8]),
230 FieldType::Array(inner) => {
231 h.update([0x10u8]);
232 feed_field_type(h, inner);
233 }
234 }
235 }
236
237 let mut h = Sha256::new();
238
239 for (cn, c) in &self.collections {
240 feed(&mut h, cn.as_bytes());
241 feed(&mut h, c.id_field.as_bytes());
242
243 let fields_len =
244 u64::try_from(c.fields.len()).expect("schema component length fits in u64");
245 h.update(fields_len.to_le_bytes());
246
247 for f in &c.fields {
248 feed(&mut h, f.name.as_bytes());
249 feed_field_type(&mut h, &f.ty);
250 h.update([u8::from(f.optional)]);
251 h.update([u8::from(f.nullable)]);
252 h.update([u8::from(f.int)]);
253 h.update([u8::from(f.email)]);
254
255 h.update([u8::from(f.min.is_some())]);
257 if let Some(m) = f.min {
258 h.update(m.to_le_bytes());
259 }
260 h.update([u8::from(f.max.is_some())]);
261 if let Some(m) = f.max {
262 h.update(m.to_le_bytes());
263 }
264
265 h.update([u8::from(f.regex.is_some())]);
267 if let Some(re) = &f.regex {
268 feed(&mut h, re.as_bytes());
269 }
270
271 let variants_len =
273 u64::try_from(f.variants.len()).expect("schema component length fits in u64");
274 h.update(variants_len.to_le_bytes());
275 for v in &f.variants {
276 feed(&mut h, v.as_bytes());
277 }
278 }
279
280 let indexes_len =
281 u64::try_from(c.indexes.len()).expect("schema component length fits in u64");
282 h.update(indexes_len.to_le_bytes());
283 for i in &c.indexes {
284 feed(&mut h, i.field.as_bytes());
285 h.update([u8::from(i.unique)]);
286 }
287 }
288
289 h.finalize().into()
290 }
291
292 fn compile_fields(cname: &str, raw_fields: &[RawField]) -> Result<Vec<FieldIr>, NookError> {
295 let mut fields = Vec::with_capacity(raw_fields.len());
296 for f in raw_fields {
297 let ty = Self::parse_field_type_recursive(f, cname)?;
298 fields.push(FieldIr {
299 name: f.name.clone(),
300 ty,
301 optional: f.optional,
302 nullable: f.nullable,
303 min: f.min,
304 max: f.max,
305 int: f.int,
306 email: f.email,
307 regex: f.regex.clone(),
308 variants: f.variants.clone(),
309 });
310 }
311 Ok(fields)
312 }
313
314 fn parse_field_type_recursive(raw: &RawField, cname: &str) -> Result<FieldType, NookError> {
315 match raw.ty.as_str() {
316 "id" => Ok(FieldType::Id),
317 "string" => Ok(FieldType::String),
318 "number" => Ok(FieldType::Number),
319 "boolean" => Ok(FieldType::Bool),
320 "enum" => Ok(FieldType::Enum),
321 "date" => Ok(FieldType::Date),
322 "array" => {
323 let items = raw.items.as_deref().ok_or_else(|| NookError::Schema {
324 msg: format!(
325 "array field {:?} in collection {cname:?} missing 'items' descriptor",
326 raw.name,
327 ),
328 })?;
329 if items.ty == "id" {
330 return Err(NookError::Schema {
331 msg: format!(
332 "array field {:?} in collection {cname:?}: id is not a valid array item type",
333 raw.name,
334 ),
335 });
336 }
337 let inner = Self::parse_field_type_recursive(items, cname)?;
338 Ok(FieldType::Array(Box::new(inner)))
339 }
340 other => Err(NookError::Schema {
341 msg: format!("unknown field type {other:?} in collection {cname:?}"),
342 }),
343 }
344 }
345
346 fn validate_id_field(cname: &str, id_field: &str, fields: &[FieldIr]) -> Result<(), NookError> {
347 if !fields
348 .iter()
349 .any(|f| f.name == id_field && f.ty == FieldType::Id)
350 {
351 return Err(NookError::Schema {
352 msg: format!("collection {cname:?} missing id field {id_field:?} with type \"id\""),
353 });
354 }
355 Ok(())
356 }
357
358 fn compile_indexes(
359 cname: &str,
360 raw_indexes: &[RawIndex],
361 fields: &[FieldIr],
362 ) -> Result<Vec<IndexIr>, NookError> {
363 let mut indexes = Vec::with_capacity(raw_indexes.len());
364 for idx in raw_indexes {
365 let Some(fld) = fields.iter().find(|f| f.name == idx.field) else {
366 return Err(NookError::Schema {
367 msg: format!(
368 "index on unknown field {:?} in collection {cname:?}",
369 idx.field
370 ),
371 });
372 };
373 if fld.optional || fld.nullable {
374 return Err(NookError::Schema {
375 msg: format!(
376 "index requires a required, non-null field; {:?} is optional/nullable",
377 idx.field
378 ),
379 });
380 }
381 if matches!(fld.ty, FieldType::Array(_)) {
382 return Err(NookError::Schema {
383 msg: format!(
384 "cannot index array field {:?} in collection {cname:?} (M5c limitation: composite-key index codec stores scalars only)",
385 idx.field,
386 ),
387 });
388 }
389 indexes.push(IndexIr {
390 field: idx.field.clone(),
391 unique: idx.unique,
392 });
393 }
394 Ok(indexes)
395 }
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401
402 fn desc() -> &'static str {
403 r#"{"users":{"idField":"id","fields":[
404 {"name":"id","type":"id"},
405 {"name":"email","type":"string","email":true},
406 {"name":"role","type":"enum","variants":["admin","user"]},
407 {"name":"age","type":"number","int":true,"min":0,"optional":true}],
408 "indexes":[{"field":"email","unique":true},{"field":"role","unique":false}]}}"#
409 }
410
411 #[test]
412 fn compiles_valid_descriptor() {
413 let ir = SchemaIr::compile(desc()).unwrap();
414 let c = ir.collection("users").unwrap();
415 assert_eq!(c.id_field, "id");
416 assert_eq!(c.fields.len(), 4);
417 assert_eq!(c.indexes.len(), 2);
418 }
419
420 #[test]
421 fn rejects_collection_without_id_field() {
422 let d = r#"{"c":{"idField":"id","fields":[{"name":"x","type":"string"}],"indexes":[]}}"#;
423 let e = SchemaIr::compile(d).unwrap_err();
424 assert_eq!(e.kind(), crate::error::NookErrorKind::Schema);
425 }
426
427 #[test]
428 fn rejects_index_on_optional_field() {
429 let d = r#"{"c":{"idField":"id","fields":[
430 {"name":"id","type":"id"},{"name":"x","type":"string","optional":true}],
431 "indexes":[{"field":"x","unique":false}]}}"#;
432 let e = SchemaIr::compile(d).unwrap_err();
433 assert_eq!(e.kind(), crate::error::NookErrorKind::Schema);
434 }
435
436 #[test]
437 fn rejects_reserved_meta_collection_name() {
438 let d = r#"{"_meta":{"idField":"id","fields":[{"name":"id","type":"id"}],"indexes":[]}}"#;
439 let e = SchemaIr::compile(d).unwrap_err();
440 assert_eq!(e.kind(), crate::error::NookErrorKind::Schema);
441 }
442
443 #[allow(clippy::too_many_lines)]
444 #[test]
445 fn schema_hash_diverges_on_constraint_only_diff() {
446 fn h(d: &str) -> [u8; 32] {
451 SchemaIr::compile(d).unwrap().schema_hash()
452 }
453
454 assert_ne!(
456 h(r#"{"c":{"idField":"id","fields":[
457 {"name":"id","type":"id"},
458 {"name":"x","type":"number","min":1}],"indexes":[]}}"#),
459 h(r#"{"c":{"idField":"id","fields":[
460 {"name":"id","type":"id"},
461 {"name":"x","type":"number","min":2}],"indexes":[]}}"#),
462 "min must affect schema_hash",
463 );
464
465 assert_ne!(
467 h(r#"{"c":{"idField":"id","fields":[
468 {"name":"id","type":"id"},
469 {"name":"x","type":"number","max":100}],"indexes":[]}}"#),
470 h(r#"{"c":{"idField":"id","fields":[
471 {"name":"id","type":"id"},
472 {"name":"x","type":"number","max":200}],"indexes":[]}}"#),
473 "max must affect schema_hash",
474 );
475
476 assert_ne!(
478 h(r#"{"c":{"idField":"id","fields":[
479 {"name":"id","type":"id"},
480 {"name":"x","type":"number"}],"indexes":[]}}"#),
481 h(r#"{"c":{"idField":"id","fields":[
482 {"name":"id","type":"id"},
483 {"name":"x","type":"number","min":0}],"indexes":[]}}"#),
484 "Some(min) vs None must affect schema_hash",
485 );
486
487 assert_ne!(
489 h(r#"{"c":{"idField":"id","fields":[
490 {"name":"id","type":"id"},
491 {"name":"x","type":"number"}],"indexes":[]}}"#),
492 h(r#"{"c":{"idField":"id","fields":[
493 {"name":"id","type":"id"},
494 {"name":"x","type":"number","max":0}],"indexes":[]}}"#),
495 "Some(max) vs None must affect schema_hash",
496 );
497
498 assert_ne!(
500 h(r#"{"c":{"idField":"id","fields":[
501 {"name":"id","type":"id"},
502 {"name":"x","type":"number","int":true}],"indexes":[]}}"#),
503 h(r#"{"c":{"idField":"id","fields":[
504 {"name":"id","type":"id"},
505 {"name":"x","type":"number","int":false}],"indexes":[]}}"#),
506 "int must affect schema_hash",
507 );
508
509 assert_ne!(
511 h(r#"{"c":{"idField":"id","fields":[
512 {"name":"id","type":"id"},
513 {"name":"x","type":"string","email":true}],"indexes":[]}}"#),
514 h(r#"{"c":{"idField":"id","fields":[
515 {"name":"id","type":"id"},
516 {"name":"x","type":"string","email":false}],"indexes":[]}}"#),
517 "email must affect schema_hash",
518 );
519
520 assert_ne!(
522 h(r#"{"c":{"idField":"id","fields":[
523 {"name":"id","type":"id"},
524 {"name":"r","type":"enum","variants":["a","b"]}],"indexes":[]}}"#),
525 h(r#"{"c":{"idField":"id","fields":[
526 {"name":"id","type":"id"},
527 {"name":"r","type":"enum","variants":["a","b","c"]}],"indexes":[]}}"#),
528 "variants set must affect schema_hash",
529 );
530
531 assert_ne!(
533 h(r#"{"c":{"idField":"id","fields":[
534 {"name":"id","type":"id"},
535 {"name":"r","type":"enum","variants":["a","b"]}],"indexes":[]}}"#),
536 h(r#"{"c":{"idField":"id","fields":[
537 {"name":"id","type":"id"},
538 {"name":"r","type":"enum","variants":["b","a"]}],"indexes":[]}}"#),
539 "variants order must affect schema_hash (declared order is part of the schema)",
540 );
541
542 assert_ne!(
544 h(r#"{"c":{"idField":"id","fields":[
545 {"name":"id","type":"id"},
546 {"name":"x","type":"string","optional":true}],"indexes":[]}}"#),
547 h(r#"{"c":{"idField":"id","fields":[
548 {"name":"id","type":"id"},
549 {"name":"x","type":"string","optional":false}],"indexes":[]}}"#),
550 "optional must affect schema_hash",
551 );
552
553 assert_ne!(
555 h(r#"{"c":{"idField":"id","fields":[
556 {"name":"id","type":"id"},
557 {"name":"x","type":"string","nullable":true}],"indexes":[]}}"#),
558 h(r#"{"c":{"idField":"id","fields":[
559 {"name":"id","type":"id"},
560 {"name":"x","type":"string","nullable":false}],"indexes":[]}}"#),
561 "nullable must affect schema_hash",
562 );
563
564 assert_ne!(
566 h(r#"{"c":{"idField":"id","fields":[
567 {"name":"id","type":"id"},
568 {"name":"x","type":"string"}],
569 "indexes":[{"field":"x","unique":true}]}}"#),
570 h(r#"{"c":{"idField":"id","fields":[
571 {"name":"id","type":"id"},
572 {"name":"x","type":"string"}],
573 "indexes":[{"field":"x","unique":false}]}}"#),
574 "index unique must affect schema_hash",
575 );
576 }
577
578 #[test]
579 fn schema_hash_is_stable_and_order_independent() {
580 let h1: [u8; 32] = SchemaIr::compile(desc()).unwrap().schema_hash();
581 let h2: [u8; 32] = SchemaIr::compile(desc()).unwrap().schema_hash();
582 assert_eq!(h1, h2);
583 assert!(h1.iter().any(|&b| b != 0));
584
585 let a = r#"{"alpha":{"idField":"id","fields":[{"name":"id","type":"id"}],"indexes":[]},
588 "beta":{"idField":"id","fields":[{"name":"id","type":"id"}],"indexes":[]}}"#;
589 let b = r#"{"beta":{"idField":"id","fields":[{"name":"id","type":"id"}],"indexes":[]},
590 "alpha":{"idField":"id","fields":[{"name":"id","type":"id"}],"indexes":[]}}"#;
591 assert_eq!(
592 SchemaIr::compile(a).unwrap().schema_hash(),
593 SchemaIr::compile(b).unwrap().schema_hash(),
594 "schema_hash must be independent of top-level collection JSON key order",
595 );
596 }
597
598 #[test]
599 fn array_field_type_compiles_and_distinguishes_inner_type() {
600 let str_arr = SchemaIr::compile(
601 r#"{"c":{"idField":"id","fields":[
602 {"name":"id","type":"id"},
603 {"name":"tags","type":"array","items":{"type":"string"}}],
604 "indexes":[]}}"#,
605 )
606 .unwrap();
607
608 let num_arr = SchemaIr::compile(
609 r#"{"c":{"idField":"id","fields":[
610 {"name":"id","type":"id"},
611 {"name":"tags","type":"array","items":{"type":"number"}}],
612 "indexes":[]}}"#,
613 )
614 .unwrap();
615
616 assert_ne!(
617 str_arr.schema_hash(),
618 num_arr.schema_hash(),
619 "Array(String) and Array(Number) must produce different hashes",
620 );
621
622 let c = str_arr.collection("c").unwrap();
623 let tags_field = c.field("tags").unwrap();
624 match &tags_field.ty {
625 FieldType::Array(inner) => assert!(matches!(**inner, FieldType::String)),
626 other => panic!("expected Array(String), got {other:?}"),
627 }
628 }
629
630 #[test]
631 fn schema_hash_returns_32_bytes() {
632 let d = r#"{"u":{"idField":"id","fields":[{"name":"id","type":"id"}],"indexes":[]}}"#;
637 let h: [u8; 32] = SchemaIr::compile(d).unwrap().schema_hash();
638 assert_eq!(h.len(), 32);
639 assert!(
641 h.iter().any(|&b| b != 0),
642 "schema_hash must not be all zeros"
643 );
644 }
645}