hydrate_schema/schema_def/
schema_linker.rs

1use super::enum_type_builder::*;
2use super::record_type_builder::*;
3use super::schema_def::*;
4use crate::{HashMap, HashSet, SchemaDefParserError, SchemaFingerprint, SchemaNamedType};
5use siphasher::sip128::Hasher128;
6use std::error::Error;
7use std::fmt::{Display, Formatter};
8use std::hash::Hash;
9use std::path::Path;
10use uuid::Uuid;
11
12#[derive(Debug)]
13pub enum SchemaLinkerError {
14    Str(&'static str),
15    String(String),
16    ValidationError(SchemaDefValidationError),
17}
18
19impl Display for SchemaLinkerError {
20    fn fmt(
21        &self,
22        f: &mut Formatter<'_>,
23    ) -> std::fmt::Result {
24        match self {
25            SchemaLinkerError::ValidationError(e) => write!(f, "Error linking schema: {}", e),
26            _ => write!(f, "Error linking schema: {:?}", self),
27        }
28    }
29}
30
31impl Error for SchemaLinkerError {}
32
33impl From<SchemaDefParserError> for SchemaLinkerError {
34    fn from(err: SchemaDefParserError) -> Self {
35        match err {
36            SchemaDefParserError::Str(x) => SchemaLinkerError::Str(x),
37            SchemaDefParserError::String(x) => SchemaLinkerError::String(x),
38            SchemaDefParserError::ValidationError(x) => SchemaLinkerError::ValidationError(x),
39        }
40    }
41}
42
43impl From<SchemaDefValidationError> for SchemaLinkerError {
44    fn from(err: SchemaDefValidationError) -> Self {
45        SchemaLinkerError::ValidationError(err)
46    }
47}
48
49pub type SchemaLinkerResult<T> = Result<T, SchemaLinkerError>;
50
51/// Acccumulates schema definitions defined in code or by json. Once schemas have been loaded, they
52/// are "linked", producing read-only schemas that are hashed and may cyclically reference each
53/// other. The individual schemas are also very cheap to clone as they are stored in Arc<T>s.
54#[derive(Default)]
55pub struct SchemaLinker {
56    types: HashMap<String, SchemaDefNamedType>,
57    type_aliases: HashMap<String, String>,
58    //records: Vec<SchemaFromFileRecord>,
59    // enums
60    // fixed
61    // union?
62}
63
64impl SchemaLinker {
65    pub fn unlinked_type_names(&self) -> Vec<String> {
66        self.types.keys().cloned().collect()
67    }
68
69    fn add_named_type(
70        &mut self,
71        named_type: SchemaDefNamedType,
72    ) -> SchemaLinkerResult<()> {
73        log::trace!("Adding type {}", named_type.type_name());
74        if self.types.contains_key(named_type.type_name()) {
75            Err(SchemaLinkerError::String(format!(
76                "Type name {} has already been used",
77                named_type.type_name()
78            )))?;
79        }
80
81        if self.type_aliases.contains_key(named_type.type_name()) {
82            Err(SchemaLinkerError::String(format!(
83                "Type name {} has already been used",
84                named_type.type_name()
85            )))?;
86        }
87
88        for alias in named_type.aliases() {
89            if self.types.contains_key(alias) {
90                Err(SchemaLinkerError::String(format!(
91                    "Type name {} has already been used",
92                    alias
93                )))?;
94            }
95
96            if self.type_aliases.contains_key(alias) {
97                Err(SchemaLinkerError::String(format!(
98                    "Type name {} has already been used",
99                    alias
100                )))?;
101            }
102        }
103
104        for alias in named_type.aliases() {
105            self.type_aliases
106                .insert(alias.to_string(), named_type.type_name().to_string());
107        }
108        //let schema_def = SchemaDefType::NamedType(named_type.type_name().to_string());
109        self.types
110            .insert(named_type.type_name().to_string(), named_type);
111        Ok(())
112    }
113
114    pub fn add_source_dir<PathT: AsRef<Path>, PatternT: AsRef<str>>(
115        &mut self,
116        path: PathT,
117        pattern: PatternT,
118    ) -> SchemaLinkerResult<()> {
119        log::info!(
120            "Adding schema source dir {:?} with pattern {:?}",
121            path.as_ref(),
122            pattern.as_ref()
123        );
124        let walker = globwalk::GlobWalkerBuilder::new(path.as_ref(), pattern.as_ref())
125            .file_type(globwalk::FileType::FILE)
126            .build()
127            .unwrap();
128
129        for file in walker {
130            let file = file.unwrap();
131            log::trace!("Parsing schema file {}", file.path().display());
132            let schema_str = std::fs::read_to_string(file.path()).unwrap();
133            let json_value: serde_json::Value = {
134                profiling::scope!("serde_json::from_str");
135                serde_json::from_str(&schema_str).unwrap()
136            };
137            //println!("VALUE {:#?}", value);
138
139            let json_objects = json_value.as_array().ok_or_else(|| {
140                SchemaLinkerError::Str("Schema file must be an array of json objects")
141            })?;
142
143            let base_path = dunce::canonicalize(file.path()).unwrap();
144
145            for json_object in json_objects {
146                let named_type = super::json_schema::parse_json_schema_def(
147                    &json_object,
148                    &format!("[{}]", file.path().display()),
149                    &base_path,
150                )?;
151
152                self.add_named_type(named_type)?;
153            }
154        }
155
156        Ok(())
157    }
158
159    pub fn register_record_type<F: Fn(&mut RecordTypeBuilder)>(
160        &mut self,
161        name: impl Into<String>,
162        type_uuid: Uuid,
163        f: F,
164    ) -> SchemaLinkerResult<()> {
165        let mut builder = RecordTypeBuilder::default();
166        (f)(&mut builder);
167
168        let mut fields = Vec::with_capacity(builder.fields.len());
169        for builder_field in builder.fields {
170            fields.push(SchemaDefRecordField::new(
171                builder_field.name,
172                builder_field.field_uuid,
173                builder_field.aliases,
174                builder_field.field_type,
175                builder_field.markup,
176            )?);
177        }
178
179        let name = name.into();
180        let schema_record = SchemaDefRecord::new(
181            name.clone(),
182            type_uuid,
183            builder.aliases,
184            fields,
185            builder.markup,
186        )?;
187        let named_type = SchemaDefNamedType::Record(schema_record);
188        self.add_named_type(named_type)
189    }
190
191    pub fn register_enum_type<F: Fn(&mut EnumTypeBuilder)>(
192        &mut self,
193        name: impl Into<String>,
194        type_uuid: Uuid,
195        f: F,
196    ) -> SchemaLinkerResult<()> {
197        let mut builder = EnumTypeBuilder::default();
198        (f)(&mut builder);
199
200        let mut symbols = Vec::with_capacity(builder.symbols.len());
201        for builder_field in builder.symbols {
202            symbols.push(SchemaDefEnumSymbol::new(
203                builder_field.name,
204                builder_field.symbol_uuid,
205                builder_field.aliases,
206            )?);
207        }
208
209        symbols.sort_by(|a, b| a.symbol_name.cmp(&b.symbol_name));
210
211        let name = name.into();
212        let schema_enum = SchemaDefEnum::new(name.clone(), type_uuid, builder.aliases, symbols)?;
213
214        let named_type = SchemaDefNamedType::Enum(schema_enum);
215        self.add_named_type(named_type)
216    }
217
218    fn validate_schema(
219        schema_being_validated: &str,
220        schema: &SchemaDefType,
221        named_types: &HashMap<String, SchemaDefNamedType>,
222        validated_types: &mut HashSet<String>,
223    ) -> Result<(), SchemaDefValidationError> {
224        match schema {
225            // For nullables we just need to make sure their inner type is validated
226            SchemaDefType::Nullable(def) => {
227                Self::validate_schema(schema_being_validated, &*def, named_types, validated_types)
228            }
229            // These value types don't need any validation
230            SchemaDefType::Boolean => Ok(()),
231            SchemaDefType::I32 => Ok(()),
232            SchemaDefType::I64 => Ok(()),
233            SchemaDefType::U32 => Ok(()),
234            SchemaDefType::U64 => Ok(()),
235            SchemaDefType::F32 => Ok(()),
236            SchemaDefType::F64 => Ok(()),
237            SchemaDefType::Bytes => Ok(()),
238            SchemaDefType::String => Ok(()),
239            // For arrays we just need to make sure their inner type is validated
240            SchemaDefType::StaticArray(def) => Self::validate_schema(
241                schema_being_validated,
242                &*def.item_type,
243                named_types,
244                validated_types,
245            ),
246            SchemaDefType::DynamicArray(def) => Self::validate_schema(
247                schema_being_validated,
248                &*def.item_type,
249                named_types,
250                validated_types,
251            ),
252            // For maps we need to validate the key/value types, and that the key type is allowed to be used as a key
253            SchemaDefType::Map(def) => {
254                // If we update this, update the similar logic in parse_json_schema_type_ref()
255                match &*def.key_type {
256                    SchemaDefType::Boolean
257                    | SchemaDefType::I32
258                    | SchemaDefType::I64
259                    | SchemaDefType::U32
260                    | SchemaDefType::U64
261                    | SchemaDefType::String
262                    | SchemaDefType::AssetRef(_) => {
263                        // valid keys
264                        Ok(())
265                    }
266                    // Invalid schema, we don't support these types as keys
267                    SchemaDefType::Nullable(_) => Err(SchemaDefValidationError::InvalidMapKeyType(
268                        schema_being_validated.to_string(),
269                        "Nullable".to_string(),
270                    )),
271                    SchemaDefType::F32 => Err(SchemaDefValidationError::InvalidMapKeyType(
272                        schema_being_validated.to_string(),
273                        "F32".to_string(),
274                    )),
275                    SchemaDefType::F64 => Err(SchemaDefValidationError::InvalidMapKeyType(
276                        schema_being_validated.to_string(),
277                        "F64".to_string(),
278                    )),
279                    SchemaDefType::Bytes => Err(SchemaDefValidationError::InvalidMapKeyType(
280                        schema_being_validated.to_string(),
281                        "Bytes".to_string(),
282                    )),
283                    SchemaDefType::StaticArray(_) => {
284                        Err(SchemaDefValidationError::InvalidMapKeyType(
285                            schema_being_validated.to_string(),
286                            "StaticArray".to_string(),
287                        ))
288                    }
289                    SchemaDefType::DynamicArray(_) => {
290                        Err(SchemaDefValidationError::InvalidMapKeyType(
291                            schema_being_validated.to_string(),
292                            "DynamicArray".to_string(),
293                        ))
294                    }
295                    SchemaDefType::Map(_) => Err(SchemaDefValidationError::InvalidMapKeyType(
296                        schema_being_validated.to_string(),
297                        "Map".to_string(),
298                    )),
299                    SchemaDefType::NamedType(key_named_type) => {
300                        match named_types.get(key_named_type) {
301                            Some(SchemaDefNamedType::Record(_)) => {
302                                // Records are not valid map key types
303                                Err(SchemaDefValidationError::InvalidMapKeyType(
304                                    schema_being_validated.to_string(),
305                                    key_named_type.to_string(),
306                                ))
307                            }
308                            Some(SchemaDefNamedType::Enum(_)) => {
309                                // Enums are ok as map key types
310                                Ok(())
311                            }
312                            None => {
313                                // Could not find the referenced named type
314                                Err(SchemaDefValidationError::ReferencedNamedTypeNotFound(
315                                    schema_being_validated.to_string(),
316                                    key_named_type.to_string(),
317                                ))
318                            }
319                        }
320                    }
321                }?;
322                Self::validate_schema(
323                    schema_being_validated,
324                    &*def.value_type,
325                    named_types,
326                    validated_types,
327                )?;
328                Self::validate_schema(
329                    schema_being_validated,
330                    &*def.value_type,
331                    named_types,
332                    validated_types,
333                )?;
334                Ok(())
335            }
336            // For assets we verify they point at a record
337            SchemaDefType::AssetRef(def) => {
338                match named_types.get(def) {
339                    Some(SchemaDefNamedType::Record(_)) => {
340                        // Asset ref points to a record in the named_types map, we're good
341                        Ok(())
342                    }
343                    Some(SchemaDefNamedType::Enum(_)) => {
344                        // Asset refs can't point at enums
345                        Err(SchemaDefValidationError::InvalidAssetRefInnerType(
346                            schema_being_validated.to_string(),
347                            def.to_string(),
348                        ))
349                    }
350                    None => Err(SchemaDefValidationError::ReferencedNamedTypeNotFound(
351                        schema_being_validated.to_string(),
352                        def.to_string(),
353                    )),
354                }
355            }
356            // For named types, we validate the fields. However, we need to handle cyclical references between types
357            SchemaDefType::NamedType(type_name) => {
358                // Handle cyclical type references
359                if validated_types.contains(type_name) {
360                    return Ok(());
361                }
362                validated_types.insert(type_name.clone());
363
364                match named_types.get(type_name) {
365                    Some(SchemaDefNamedType::Record(def)) => {
366                        // Validate field types
367                        for field_def in def.fields() {
368                            Self::validate_schema(
369                                schema_being_validated,
370                                &field_def.field_type,
371                                named_types,
372                                validated_types,
373                            )?;
374                        }
375                        Ok(())
376                    }
377                    Some(SchemaDefNamedType::Enum(_)) => Ok(()),
378                    None => Err(SchemaDefValidationError::ReferencedNamedTypeNotFound(
379                        schema_being_validated.to_string(),
380                        type_name.to_string(),
381                    )),
382                }
383            }
384        }
385    }
386
387    pub fn link_schemas(mut self) -> SchemaLinkerResult<LinkedSchemas> {
388        // Apply aliases
389        for (_, named_type) in &mut self.types {
390            named_type.apply_type_aliases(&self.type_aliases);
391        }
392
393        let mut validated_types = Default::default();
394        for (schema_name, named_type) in &self.types {
395            Self::validate_schema(
396                schema_name,
397                &SchemaDefType::NamedType(named_type.type_name().to_string()),
398                &self.types,
399                &mut validated_types,
400            )
401            .map_err(|err| SchemaLinkerError::ValidationError(err))?;
402        }
403
404        let mut partial_hashes = HashMap::default();
405        for (type_name, named_type) in &self.types {
406            let mut hasher = siphasher::sip128::SipHasher::default();
407            //println!("partial hash {}", named_type.type_name());
408            named_type.partial_hash(&mut hasher);
409            let partial_fingerprint = hasher.finish128().as_u128();
410            partial_hashes.insert(type_name, partial_fingerprint);
411        }
412
413        let mut schemas_by_type_uuid: HashMap<Uuid, SchemaFingerprint> = Default::default();
414        let mut schemas_by_name: HashMap<String, SchemaFingerprint> = Default::default();
415        let mut schemas: HashMap<SchemaFingerprint, SchemaNamedType> = Default::default();
416
417        // Hash each thing
418        for (type_name, named_type) in &self.types {
419            let mut related_types = HashSet::default();
420            related_types.insert(type_name.clone());
421
422            loop {
423                // We make a copy because otherwise we would be iterating the HashSet while we are appending to it
424                let before_copy: Vec<_> = related_types.iter().cloned().collect();
425                for related_type in &before_copy {
426                    // If you hit this unwrap, a schema file is likely referencing a type that does not exist
427                    // Keep in mind it's case-sensitive
428                    let Some(related_type) = self.types.get(related_type) else {
429                        panic!("Type named {} was referenced but undefined", related_type);
430                    };
431                    related_type.collect_all_related_types(&mut related_types);
432                }
433
434                if before_copy.len() == related_types.len() {
435                    break;
436                }
437            }
438
439            named_type.collect_all_related_types(&mut related_types);
440
441            let mut related_types: Vec<_> = related_types.into_iter().collect();
442            related_types.sort();
443
444            let mut hasher = siphasher::sip128::SipHasher::default();
445            for related_type in &related_types {
446                //let related_type = self.types.get(related_type);
447                let partial_hash = partial_hashes.get(related_type).unwrap();
448                partial_hash.hash(&mut hasher);
449            }
450            let fingerprint = SchemaFingerprint(hasher.finish128().as_u128());
451
452            // log::debug!(
453            //     "type {} fingerprint is {}",
454            //     type_name,
455            //     fingerprint.as_uuid()
456            // );
457            schemas_by_type_uuid.insert(named_type.type_uuid(), fingerprint);
458            schemas_by_name.insert(type_name.to_string(), fingerprint);
459        }
460
461        for (_type_name, named_type) in &self.types {
462            let fingerprint = schemas_by_name.get(named_type.type_name()).unwrap();
463            let schema = named_type.to_schema(&self.types, &schemas_by_name);
464            schemas.insert(*fingerprint, schema);
465        }
466
467        Ok(LinkedSchemas {
468            schemas_by_type_uuid,
469            schemas_by_name,
470            schemas,
471        })
472    }
473}
474
475pub struct LinkedSchemas {
476    pub schemas_by_type_uuid: HashMap<Uuid, SchemaFingerprint>,
477    pub schemas_by_name: HashMap<String, SchemaFingerprint>,
478    pub schemas: HashMap<SchemaFingerprint, SchemaNamedType>,
479}