Skip to main content

statevec_model/
idl.rs

1// Copyright 2026 Jumpex Technology.
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::model::FieldType;
5use crate::registry::{SchemaFingerprint, SchemaRegistry};
6use crate::{
7    CommandDefinition, EnumDefinition, EnumVariantDefinition, EventDefinition, FieldDefinition,
8    PayloadFieldDefinition, RecordDefinition, Version,
9};
10use serde::Deserialize;
11use std::fmt;
12
13struct JsonWriter {
14    buf: String,
15    indent: usize,
16}
17
18impl JsonWriter {
19    fn new() -> Self {
20        Self {
21            buf: String::with_capacity(4096),
22            indent: 0,
23        }
24    }
25
26    fn indent_str(&self) -> String {
27        "  ".repeat(self.indent)
28    }
29
30    fn begin_object(&mut self) {
31        self.buf.push('{');
32        self.indent += 1;
33    }
34
35    fn end_object(&mut self) {
36        self.indent -= 1;
37        self.buf.push('\n');
38        self.buf.push_str(&self.indent_str());
39        self.buf.push('}');
40    }
41
42    fn begin_array(&mut self) {
43        self.buf.push('[');
44        self.indent += 1;
45    }
46
47    fn end_array(&mut self) {
48        self.indent -= 1;
49        self.buf.push('\n');
50        self.buf.push_str(&self.indent_str());
51        self.buf.push(']');
52    }
53
54    fn key(&mut self, name: &str, first: bool) {
55        if !first {
56            self.buf.push(',');
57        }
58        self.buf.push('\n');
59        self.buf.push_str(&self.indent_str());
60        self.buf.push('"');
61        self.buf.push_str(name);
62        self.buf.push_str("\": ");
63    }
64
65    fn array_sep(&mut self, first: bool) {
66        if !first {
67            self.buf.push(',');
68        }
69        self.buf.push('\n');
70        self.buf.push_str(&self.indent_str());
71    }
72
73    fn write_str(&mut self, s: &str) {
74        self.buf.push('"');
75        for ch in s.chars() {
76            match ch {
77                '"' => self.buf.push_str("\\\""),
78                '\\' => self.buf.push_str("\\\\"),
79                '\n' => self.buf.push_str("\\n"),
80                '\r' => self.buf.push_str("\\r"),
81                '\t' => self.buf.push_str("\\t"),
82                c if c.is_control() => {
83                    use std::fmt::Write;
84                    let _ = write!(self.buf, "\\u{:04x}", c as u32);
85                }
86                c => self.buf.push(c),
87            }
88        }
89        self.buf.push('"');
90    }
91
92    fn write_u32(&mut self, v: u32) {
93        self.buf.push_str(&v.to_string());
94    }
95
96    fn write_bool(&mut self, v: bool) {
97        self.buf.push_str(if v { "true" } else { "false" });
98    }
99
100    fn finish(self) -> String {
101        self.buf
102    }
103}
104
105fn hex16(bytes: &[u8; 16]) -> String {
106    use std::fmt::Write;
107    let mut s = String::with_capacity(32);
108    for b in bytes {
109        let _ = write!(s, "{:02x}", b);
110    }
111    s
112}
113
114fn field_type_json(
115    w: &mut JsonWriter,
116    ty: FieldType,
117    rust_type_name: &str,
118    enum_type_name: Option<&str>,
119    fixed_size: Option<u32>,
120) {
121    match ty {
122        FieldType::Bool => w.write_str("bool"),
123        FieldType::U8 => w.write_str("u8"),
124        FieldType::U16 => w.write_str("u16"),
125        FieldType::U32 => w.write_str("u32"),
126        FieldType::U64 => w.write_str("u64"),
127        FieldType::I32 => w.write_str("i32"),
128        FieldType::I64 => w.write_str("i64"),
129        FieldType::U128 => w.write_str("u128"),
130        FieldType::FixedBytes => {
131            if let Some(n) = fixed_size {
132                // For records: fixed_size is the total allocation (2 + N), so capacity = N - 2
133                // For payloads: fixed_size is already 2 + N
134                // We parse from rust_type_name to get N directly
135                let capacity =
136                    parse_fixed_bytes_capacity(rust_type_name).unwrap_or(n.saturating_sub(2));
137                w.buf.push_str(&format!("{{\"fixedBytes\": {}}}", capacity));
138            } else {
139                // Fallback: parse from rust_type_name
140                if let Some(n) = parse_fixed_bytes_capacity(rust_type_name) {
141                    w.buf.push_str(&format!("{{\"fixedBytes\": {}}}", n));
142                } else {
143                    w.write_str("fixedBytes");
144                }
145            }
146        }
147        FieldType::VarBytes => w.write_str("varBytes"),
148        FieldType::EnumU8 => {
149            if let Some(name) = enum_type_name {
150                w.buf.push_str(&format!("{{\"defined\": \"{}\"}}", name));
151            } else {
152                w.write_str("enumU8");
153            }
154        }
155    }
156}
157
158#[doc(hidden)]
159pub(crate) fn parse_fixed_bytes_capacity(rust_type_name: &str) -> Option<u32> {
160    // rust_type_name looks like "FixedBytes<16>" or "FixedBytes < 16 >"
161    let s = rust_type_name.trim();
162    let after = s.strip_prefix("FixedBytes")?;
163    let inner = after.trim().strip_prefix('<')?.strip_suffix('>')?.trim();
164    inner.parse::<u32>().ok()
165}
166
167/// Error returned when schema IDL JSON cannot be decoded or validated.
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub enum SchemaIdlError {
170    /// Input bytes were not valid UTF-8.
171    InvalidUtf8,
172    /// JSON parsing failed.
173    Json(String),
174    /// The IDL document version is not supported by this crate.
175    UnsupportedIdlVersion(String),
176    /// The IDL document is syntactically valid but semantically invalid.
177    InvalidSchema(String),
178}
179
180impl fmt::Display for SchemaIdlError {
181    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
182        match self {
183            Self::InvalidUtf8 => f.write_str("schema IDL bytes are not valid UTF-8"),
184            Self::Json(message) => write!(f, "failed to parse schema IDL JSON: {message}"),
185            Self::UnsupportedIdlVersion(version) => {
186                write!(f, "unsupported schema IDL version: {version}")
187            }
188            Self::InvalidSchema(message) => f.write_str(message),
189        }
190    }
191}
192
193impl std::error::Error for SchemaIdlError {}
194
195#[derive(Debug, Deserialize)]
196#[serde(rename_all = "camelCase")]
197struct IdlDocument {
198    idl_version: String,
199    schema_version: IdlVersion,
200    fingerprints: IdlFingerprints,
201    #[serde(default)]
202    types: Vec<IdlEnumDefinition>,
203    #[serde(default)]
204    records: Vec<IdlRecordDefinition>,
205    #[serde(default)]
206    commands: Vec<IdlPayloadDefinition>,
207    #[serde(default)]
208    events: Vec<IdlPayloadDefinition>,
209}
210
211#[derive(Debug, Deserialize)]
212struct IdlVersion {
213    main: u8,
214    minor: u8,
215}
216
217#[derive(Debug, Deserialize)]
218struct IdlFingerprints {
219    records: String,
220    commands: String,
221    events: String,
222    types: String,
223}
224
225#[derive(Debug, Deserialize)]
226struct IdlEnumDefinition {
227    name: String,
228    kind: String,
229    variants: Vec<IdlEnumVariantDefinition>,
230}
231
232#[derive(Debug, Deserialize)]
233struct IdlEnumVariantDefinition {
234    name: String,
235    value: u8,
236}
237
238#[derive(Debug, Deserialize)]
239#[serde(rename_all = "camelCase")]
240struct IdlRecordDefinition {
241    kind: u8,
242    name: String,
243    version: u16,
244    data_size: u32,
245    has_pk: bool,
246    #[serde(default)]
247    pk_fields: Vec<String>,
248    support_range_scan: bool,
249    #[serde(default)]
250    fields: Vec<IdlRecordFieldDefinition>,
251}
252
253#[derive(Debug, Deserialize)]
254struct IdlRecordFieldDefinition {
255    name: String,
256    index: u32,
257    #[serde(rename = "type")]
258    ty: IdlFieldType,
259    offset: u32,
260    size: u32,
261    immutable: bool,
262}
263
264#[derive(Debug, Deserialize)]
265struct IdlPayloadDefinition {
266    kind: u8,
267    name: String,
268    version: u16,
269    #[serde(default)]
270    fields: Vec<IdlPayloadFieldDefinition>,
271}
272
273#[derive(Debug, Deserialize)]
274struct IdlPayloadFieldDefinition {
275    name: String,
276    index: u32,
277    #[serde(rename = "type")]
278    ty: IdlFieldType,
279}
280
281#[derive(Debug, Deserialize)]
282#[serde(untagged)]
283enum IdlFieldType {
284    Primitive(String),
285    FixedBytes {
286        #[serde(rename = "fixedBytes")]
287        fixed_bytes: u32,
288    },
289    Defined {
290        defined: String,
291    },
292}
293
294struct DecodedFieldType {
295    ty: FieldType,
296    rust_type_name: &'static str,
297    enum_type_name: Option<&'static str>,
298    fixed_size: Option<u32>,
299}
300
301impl SchemaRegistry {
302    /// Parses a schema registry from UTF-8 IDL JSON bytes.
303    ///
304    /// Parsed names are promoted to `'static` storage so the returned registry
305    /// has the same lifetime shape as macro-generated registries. Repeatedly
306    /// parsing unbounded IDL documents leaks those promoted strings; do not use
307    /// this on a request path or hot reload loop.
308    pub fn from_idl_bytes(bytes: &[u8]) -> Result<Self, SchemaIdlError> {
309        let json = std::str::from_utf8(bytes).map_err(|_| SchemaIdlError::InvalidUtf8)?;
310        Self::from_idl_json(json)
311    }
312
313    /// Parses a schema registry from IDL JSON text.
314    ///
315    /// See [`SchemaRegistry::from_idl_bytes`] for the lifetime and leak
316    /// behavior of parsed names.
317    pub fn from_idl_json(json: &str) -> Result<Self, SchemaIdlError> {
318        let doc: IdlDocument =
319            serde_json::from_str(json).map_err(|err| SchemaIdlError::Json(err.to_string()))?;
320        if doc.idl_version != "1.0" {
321            return Err(SchemaIdlError::UnsupportedIdlVersion(doc.idl_version));
322        }
323
324        let enum_defs: Vec<EnumDefinition> = doc
325            .types
326            .into_iter()
327            .map(|def| {
328                if def.kind != "enumU8" {
329                    return Err(SchemaIdlError::InvalidSchema(format!(
330                        "unsupported type definition kind for {}: {}",
331                        def.name, def.kind
332                    )));
333                }
334                let name = leak_string(def.name);
335                let variants = def
336                    .variants
337                    .into_iter()
338                    .map(|variant| EnumVariantDefinition {
339                        name: leak_string(variant.name),
340                        discriminant: variant.value,
341                    })
342                    .collect::<Vec<_>>();
343                Ok(EnumDefinition {
344                    name,
345                    variants: leak_slice(variants),
346                })
347            })
348            .collect::<Result<_, _>>()?;
349
350        let record_defs: Vec<RecordDefinition> = doc
351            .records
352            .into_iter()
353            .map(|def| {
354                let pk_fields = def
355                    .pk_fields
356                    .into_iter()
357                    .map(leak_string)
358                    .collect::<Vec<_>>();
359                let is_pk_idx = def.has_pk || !pk_fields.is_empty();
360                let fields = def
361                    .fields
362                    .into_iter()
363                    .map(|field| {
364                        let decoded = decode_field_type(field.ty)?;
365                        Ok(FieldDefinition {
366                            name: leak_string(field.name),
367                            field_index: field.index,
368                            offset: field.offset,
369                            ty: decoded.ty,
370                            len: field.size,
371                            rust_type_name: decoded.rust_type_name,
372                            enum_type_name: decoded.enum_type_name,
373                            immutable: field.immutable,
374                        })
375                    })
376                    .collect::<Result<Vec<_>, SchemaIdlError>>()?;
377
378                Ok(RecordDefinition {
379                    kind: def.kind,
380                    name: leak_string(def.name),
381                    is_pk_idx,
382                    support_range_scan: def.support_range_scan,
383                    data_size: def.data_size,
384                    version: def.version,
385                    pk_encode: None,
386                    fields: leak_slice(fields),
387                    reserved_fields: &[],
388                    pk_fields: leak_slice(pk_fields),
389                })
390            })
391            .collect::<Result<_, _>>()?;
392
393        let command_defs: Vec<CommandDefinition> = doc
394            .commands
395            .into_iter()
396            .map(idl_payload_to_command_definition)
397            .collect::<Result<_, _>>()?;
398
399        let event_defs: Vec<EventDefinition> = doc
400            .events
401            .into_iter()
402            .map(idl_payload_to_event_definition)
403            .collect::<Result<_, _>>()?;
404
405        Ok(SchemaRegistry::new_with_fingerprints(
406            Version::new(doc.schema_version.main, doc.schema_version.minor),
407            &record_defs,
408            &command_defs,
409            &event_defs,
410            &enum_defs,
411            parse_fingerprint("records", &doc.fingerprints.records)?,
412            parse_fingerprint("commands", &doc.fingerprints.commands)?,
413            parse_fingerprint("events", &doc.fingerprints.events)?,
414            parse_fingerprint("types", &doc.fingerprints.types)?,
415        ))
416    }
417}
418
419fn idl_payload_to_command_definition(
420    def: IdlPayloadDefinition,
421) -> Result<CommandDefinition, SchemaIdlError> {
422    Ok(CommandDefinition {
423        kind: def.kind,
424        name: leak_string(def.name),
425        version: def.version,
426        fields: leak_slice(idl_payload_fields(def.fields)?),
427    })
428}
429
430fn idl_payload_to_event_definition(
431    def: IdlPayloadDefinition,
432) -> Result<EventDefinition, SchemaIdlError> {
433    Ok(EventDefinition {
434        kind: def.kind,
435        name: leak_string(def.name),
436        version: def.version,
437        fields: leak_slice(idl_payload_fields(def.fields)?),
438    })
439}
440
441fn idl_payload_fields(
442    fields: Vec<IdlPayloadFieldDefinition>,
443) -> Result<Vec<PayloadFieldDefinition>, SchemaIdlError> {
444    fields
445        .into_iter()
446        .map(|field| {
447            let decoded = decode_field_type(field.ty)?;
448            Ok(PayloadFieldDefinition {
449                name: leak_string(field.name),
450                field_index: field.index,
451                ty: decoded.ty,
452                rust_type_name: decoded.rust_type_name,
453                enum_type_name: decoded.enum_type_name,
454                fixed_size: decoded.fixed_size,
455            })
456        })
457        .collect()
458}
459
460fn decode_field_type(field_type: IdlFieldType) -> Result<DecodedFieldType, SchemaIdlError> {
461    match field_type {
462        IdlFieldType::Primitive(kind) => {
463            let (ty, fixed_size) = match kind.as_str() {
464                "bool" => (FieldType::Bool, Some(1)),
465                "u8" => (FieldType::U8, Some(1)),
466                "u16" => (FieldType::U16, Some(2)),
467                "u32" => (FieldType::U32, Some(4)),
468                "u64" => (FieldType::U64, Some(8)),
469                "i32" => (FieldType::I32, Some(4)),
470                "i64" => (FieldType::I64, Some(8)),
471                "u128" => (FieldType::U128, Some(16)),
472                "varBytes" => (FieldType::VarBytes, None),
473                "enumU8" => {
474                    return Err(SchemaIdlError::InvalidSchema(
475                        "enumU8 field is missing its defined type name".to_string(),
476                    ));
477                }
478                other => {
479                    return Err(SchemaIdlError::InvalidSchema(format!(
480                        "unsupported field type: {other}"
481                    )));
482                }
483            };
484            Ok(DecodedFieldType {
485                ty,
486                rust_type_name: leak_string(kind),
487                enum_type_name: None,
488                fixed_size,
489            })
490        }
491        IdlFieldType::FixedBytes { fixed_bytes } => Ok(DecodedFieldType {
492            ty: FieldType::FixedBytes,
493            rust_type_name: leak_string(format!("FixedBytes<{fixed_bytes}>")),
494            enum_type_name: None,
495            fixed_size: fixed_bytes.checked_add(2),
496        }),
497        IdlFieldType::Defined { defined } => {
498            let name = leak_string(defined.clone());
499            Ok(DecodedFieldType {
500                ty: FieldType::EnumU8,
501                rust_type_name: name,
502                enum_type_name: Some(name),
503                fixed_size: Some(1),
504            })
505        }
506    }
507}
508
509fn parse_fingerprint(
510    label: &'static str,
511    value: &str,
512) -> Result<SchemaFingerprint, SchemaIdlError> {
513    if value.len() != 32 {
514        return Err(SchemaIdlError::InvalidSchema(format!(
515            "{label} fingerprint must be 32 hex chars, got {}",
516            value.len()
517        )));
518    }
519    let mut out = [0u8; 16];
520    for (idx, chunk) in value.as_bytes().chunks_exact(2).enumerate() {
521        let pair = std::str::from_utf8(chunk).map_err(|_| {
522            SchemaIdlError::InvalidSchema(format!("{label} fingerprint contains non-utf8 bytes"))
523        })?;
524        out[idx] = u8::from_str_radix(pair, 16).map_err(|_| {
525            SchemaIdlError::InvalidSchema(format!(
526                "{label} fingerprint contains invalid hex: {value}"
527            ))
528        })?;
529    }
530    Ok(out)
531}
532
533fn leak_string(value: impl Into<String>) -> &'static str {
534    Box::leak(value.into().into_boxed_str())
535}
536
537fn leak_slice<T>(values: Vec<T>) -> &'static [T] {
538    Box::leak(values.into_boxed_slice())
539}
540
541impl SchemaRegistry {
542    /// Serializes this schema registry to stable IDL JSON.
543    pub fn to_idl_json(&self) -> String {
544        let mut w = JsonWriter::new();
545        w.begin_object();
546
547        // idlVersion
548        w.key("idlVersion", true);
549        w.write_str("1.0");
550
551        // schemaVersion
552        w.key("schemaVersion", false);
553        w.begin_object();
554        w.key("main", true);
555        w.write_u32(self.schema_version().main() as u32);
556        w.key("minor", false);
557        w.write_u32(self.schema_version().minor() as u32);
558        w.end_object();
559
560        // fingerprints
561        w.key("fingerprints", false);
562        w.begin_object();
563        w.key("records", true);
564        w.write_str(&hex16(&self.record_schema_fingerprint()));
565        w.key("commands", false);
566        w.write_str(&hex16(&self.command_schema_fingerprint()));
567        w.key("events", false);
568        w.write_str(&hex16(&self.event_schema_fingerprint()));
569        w.key("types", false);
570        w.write_str(&hex16(&self.types_schema_fingerprint()));
571        w.end_object();
572
573        // types (enums)
574        w.key("types", false);
575        w.begin_array();
576        let mut first_type = true;
577        for def in self.enum_defs() {
578            w.array_sep(first_type);
579            first_type = false;
580            w.begin_object();
581            w.key("name", true);
582            w.write_str(def.name);
583            w.key("kind", false);
584            w.write_str("enumU8");
585            w.key("variants", false);
586            w.begin_array();
587            let mut first_v = true;
588            for v in def.variants {
589                w.array_sep(first_v);
590                first_v = false;
591                w.begin_object();
592                w.key("name", true);
593                w.write_str(v.name);
594                w.key("value", false);
595                w.write_u32(v.discriminant as u32);
596                w.end_object();
597            }
598            w.end_array();
599            w.end_object();
600        }
601        w.end_array();
602
603        // records
604        w.key("records", false);
605        w.begin_array();
606        let mut first_rec = true;
607        for def in self.record_defs() {
608            w.array_sep(first_rec);
609            first_rec = false;
610            w.begin_object();
611            w.key("kind", true);
612            w.write_u32(def.kind as u32);
613            w.key("name", false);
614            w.write_str(def.name);
615            w.key("version", false);
616            w.write_u32(def.version as u32);
617            w.key("dataSize", false);
618            w.write_u32(def.data_size);
619            w.key("hasPk", false);
620            w.write_bool(def.is_pk_idx);
621
622            w.key("pkFields", false);
623            w.begin_array();
624            let mut first_pk = true;
625            for pk in def.pk_fields {
626                w.array_sep(first_pk);
627                first_pk = false;
628                w.write_str(pk);
629            }
630            w.end_array();
631
632            w.key("supportRangeScan", false);
633            w.write_bool(def.support_range_scan);
634
635            w.key("fields", false);
636            w.begin_array();
637            let mut first_field = true;
638            for field in def.fields {
639                w.array_sep(first_field);
640                first_field = false;
641                w.begin_object();
642                w.key("name", true);
643                w.write_str(field.name);
644                w.key("index", false);
645                w.write_u32(field.field_index);
646                w.key("type", false);
647                // For record fields, fixed_size is field.len
648                field_type_json(
649                    &mut w,
650                    field.ty,
651                    field.rust_type_name,
652                    field.enum_type_name,
653                    Some(field.len),
654                );
655                w.key("offset", false);
656                w.write_u32(field.offset);
657                w.key("size", false);
658                w.write_u32(field.len);
659                w.key("immutable", false);
660                w.write_bool(field.immutable);
661                w.end_object();
662            }
663            w.end_array();
664            w.end_object();
665        }
666        w.end_array();
667
668        // commands
669        w.key("commands", false);
670        w.begin_array();
671        let mut first_cmd = true;
672        for def in self.command_defs() {
673            w.array_sep(first_cmd);
674            first_cmd = false;
675            w.begin_object();
676            w.key("kind", true);
677            w.write_u32(def.kind as u32);
678            w.key("name", false);
679            w.write_str(def.name);
680            w.key("version", false);
681            w.write_u32(def.version as u32);
682            w.key("fields", false);
683            w.begin_array();
684            let mut first_field = true;
685            for field in def.fields {
686                w.array_sep(first_field);
687                first_field = false;
688                w.begin_object();
689                w.key("name", true);
690                w.write_str(field.name);
691                w.key("index", false);
692                w.write_u32(field.field_index);
693                w.key("type", false);
694                field_type_json(
695                    &mut w,
696                    field.ty,
697                    field.rust_type_name,
698                    field.enum_type_name,
699                    field.fixed_size,
700                );
701                w.end_object();
702            }
703            w.end_array();
704            w.end_object();
705        }
706        w.end_array();
707
708        // events
709        w.key("events", false);
710        w.begin_array();
711        let mut first_evt = true;
712        for def in self.event_defs() {
713            w.array_sep(first_evt);
714            first_evt = false;
715            w.begin_object();
716            w.key("kind", true);
717            w.write_u32(def.kind as u32);
718            w.key("name", false);
719            w.write_str(def.name);
720            w.key("version", false);
721            w.write_u32(def.version as u32);
722            w.key("fields", false);
723            w.begin_array();
724            let mut first_field = true;
725            for field in def.fields {
726                w.array_sep(first_field);
727                first_field = false;
728                w.begin_object();
729                w.key("name", true);
730                w.write_str(field.name);
731                w.key("index", false);
732                w.write_u32(field.field_index);
733                w.key("type", false);
734                field_type_json(
735                    &mut w,
736                    field.ty,
737                    field.rust_type_name,
738                    field.enum_type_name,
739                    field.fixed_size,
740                );
741                w.end_object();
742            }
743            w.end_array();
744            w.end_object();
745        }
746        w.end_array();
747
748        w.end_object();
749        w.buf.push('\n');
750        w.finish()
751    }
752}