use super::enum_type_builder::*;
use super::record_type_builder::*;
use super::schema_def::*;
use crate::{HashMap, HashSet, SchemaDefParserError, SchemaFingerprint, SchemaNamedType};
use siphasher::sip128::Hasher128;
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::hash::Hash;
use std::path::Path;
use uuid::Uuid;
#[derive(Debug)]
pub enum SchemaLinkerError {
Str(&'static str),
String(String),
ValidationError(SchemaDefValidationError),
}
impl Display for SchemaLinkerError {
fn fmt(
&self,
f: &mut Formatter<'_>,
) -> std::fmt::Result {
match self {
SchemaLinkerError::ValidationError(e) => write!(f, "Error linking schema: {}", e),
_ => write!(f, "Error linking schema: {:?}", self),
}
}
}
impl Error for SchemaLinkerError {}
impl From<SchemaDefParserError> for SchemaLinkerError {
fn from(err: SchemaDefParserError) -> Self {
match err {
SchemaDefParserError::Str(x) => SchemaLinkerError::Str(x),
SchemaDefParserError::String(x) => SchemaLinkerError::String(x),
SchemaDefParserError::ValidationError(x) => SchemaLinkerError::ValidationError(x),
}
}
}
impl From<SchemaDefValidationError> for SchemaLinkerError {
fn from(err: SchemaDefValidationError) -> Self {
SchemaLinkerError::ValidationError(err)
}
}
pub type SchemaLinkerResult<T> = Result<T, SchemaLinkerError>;
#[derive(Default)]
pub struct SchemaLinker {
types: HashMap<String, SchemaDefNamedType>,
type_aliases: HashMap<String, String>,
}
impl SchemaLinker {
pub fn unlinked_type_names(&self) -> Vec<String> {
self.types.keys().cloned().collect()
}
fn add_named_type(
&mut self,
named_type: SchemaDefNamedType,
) -> SchemaLinkerResult<()> {
log::trace!("Adding type {}", named_type.type_name());
if self.types.contains_key(named_type.type_name()) {
Err(SchemaLinkerError::String(format!(
"Type name {} has already been used",
named_type.type_name()
)))?;
}
if self.type_aliases.contains_key(named_type.type_name()) {
Err(SchemaLinkerError::String(format!(
"Type name {} has already been used",
named_type.type_name()
)))?;
}
for alias in named_type.aliases() {
if self.types.contains_key(alias) {
Err(SchemaLinkerError::String(format!(
"Type name {} has already been used",
alias
)))?;
}
if self.type_aliases.contains_key(alias) {
Err(SchemaLinkerError::String(format!(
"Type name {} has already been used",
alias
)))?;
}
}
for alias in named_type.aliases() {
self.type_aliases
.insert(alias.to_string(), named_type.type_name().to_string());
}
self.types
.insert(named_type.type_name().to_string(), named_type);
Ok(())
}
pub fn add_source_dir<PathT: AsRef<Path>, PatternT: AsRef<str>>(
&mut self,
path: PathT,
pattern: PatternT,
) -> SchemaLinkerResult<()> {
log::info!(
"Adding schema source dir {:?} with pattern {:?}",
path.as_ref(),
pattern.as_ref()
);
let walker = globwalk::GlobWalkerBuilder::new(path.as_ref(), pattern.as_ref())
.file_type(globwalk::FileType::FILE)
.build()
.unwrap();
for file in walker {
let file = file.unwrap();
log::trace!("Parsing schema file {}", file.path().display());
let schema_str = std::fs::read_to_string(file.path()).unwrap();
let json_value: serde_json::Value = {
profiling::scope!("serde_json::from_str");
serde_json::from_str(&schema_str).unwrap()
};
let json_objects = json_value.as_array().ok_or_else(|| {
SchemaLinkerError::Str("Schema file must be an array of json objects")
})?;
let base_path = file.path().canonicalize().unwrap();
for json_object in json_objects {
let named_type = super::json_schema::parse_json_schema_def(
&json_object,
&format!("[{}]", file.path().display()),
&base_path,
)?;
self.add_named_type(named_type)?;
}
}
Ok(())
}
pub fn register_record_type<F: Fn(&mut RecordTypeBuilder)>(
&mut self,
name: impl Into<String>,
type_uuid: Uuid,
f: F,
) -> SchemaLinkerResult<()> {
let mut builder = RecordTypeBuilder::default();
(f)(&mut builder);
let mut fields = Vec::with_capacity(builder.fields.len());
for builder_field in builder.fields {
fields.push(SchemaDefRecordField::new(
builder_field.name,
builder_field.field_uuid,
builder_field.aliases,
builder_field.field_type,
builder_field.markup,
)?);
}
let name = name.into();
let schema_record = SchemaDefRecord::new(
name.clone(),
type_uuid,
builder.aliases,
fields,
builder.markup,
)?;
let named_type = SchemaDefNamedType::Record(schema_record);
self.add_named_type(named_type)
}
pub fn register_enum_type<F: Fn(&mut EnumTypeBuilder)>(
&mut self,
name: impl Into<String>,
type_uuid: Uuid,
f: F,
) -> SchemaLinkerResult<()> {
let mut builder = EnumTypeBuilder::default();
(f)(&mut builder);
let mut symbols = Vec::with_capacity(builder.symbols.len());
for builder_field in builder.symbols {
symbols.push(SchemaDefEnumSymbol::new(
builder_field.name,
builder_field.symbol_uuid,
builder_field.aliases,
)?);
}
symbols.sort_by(|a, b| a.symbol_name.cmp(&b.symbol_name));
let name = name.into();
let schema_enum = SchemaDefEnum::new(name.clone(), type_uuid, builder.aliases, symbols)?;
let named_type = SchemaDefNamedType::Enum(schema_enum);
self.add_named_type(named_type)
}
fn validate_schema(
schema_being_validated: &str,
schema: &SchemaDefType,
named_types: &HashMap<String, SchemaDefNamedType>,
validated_types: &mut HashSet<String>,
) -> Result<(), SchemaDefValidationError> {
match schema {
SchemaDefType::Nullable(def) => {
Self::validate_schema(schema_being_validated, &*def, named_types, validated_types)
}
SchemaDefType::Boolean => Ok(()),
SchemaDefType::I32 => Ok(()),
SchemaDefType::I64 => Ok(()),
SchemaDefType::U32 => Ok(()),
SchemaDefType::U64 => Ok(()),
SchemaDefType::F32 => Ok(()),
SchemaDefType::F64 => Ok(()),
SchemaDefType::Bytes => Ok(()),
SchemaDefType::String => Ok(()),
SchemaDefType::StaticArray(def) => Self::validate_schema(
schema_being_validated,
&*def.item_type,
named_types,
validated_types,
),
SchemaDefType::DynamicArray(def) => Self::validate_schema(
schema_being_validated,
&*def.item_type,
named_types,
validated_types,
),
SchemaDefType::Map(def) => {
match &*def.key_type {
SchemaDefType::Boolean
| SchemaDefType::I32
| SchemaDefType::I64
| SchemaDefType::U32
| SchemaDefType::U64
| SchemaDefType::String
| SchemaDefType::AssetRef(_) => {
Ok(())
}
SchemaDefType::Nullable(_) => Err(SchemaDefValidationError::InvalidMapKeyType(
schema_being_validated.to_string(),
"Nullable".to_string(),
)),
SchemaDefType::F32 => Err(SchemaDefValidationError::InvalidMapKeyType(
schema_being_validated.to_string(),
"F32".to_string(),
)),
SchemaDefType::F64 => Err(SchemaDefValidationError::InvalidMapKeyType(
schema_being_validated.to_string(),
"F64".to_string(),
)),
SchemaDefType::Bytes => Err(SchemaDefValidationError::InvalidMapKeyType(
schema_being_validated.to_string(),
"Bytes".to_string(),
)),
SchemaDefType::StaticArray(_) => {
Err(SchemaDefValidationError::InvalidMapKeyType(
schema_being_validated.to_string(),
"StaticArray".to_string(),
))
}
SchemaDefType::DynamicArray(_) => {
Err(SchemaDefValidationError::InvalidMapKeyType(
schema_being_validated.to_string(),
"DynamicArray".to_string(),
))
}
SchemaDefType::Map(_) => Err(SchemaDefValidationError::InvalidMapKeyType(
schema_being_validated.to_string(),
"Map".to_string(),
)),
SchemaDefType::NamedType(key_named_type) => {
match named_types.get(key_named_type) {
Some(SchemaDefNamedType::Record(_)) => {
Err(SchemaDefValidationError::InvalidMapKeyType(
schema_being_validated.to_string(),
key_named_type.to_string(),
))
}
Some(SchemaDefNamedType::Enum(_)) => {
Ok(())
}
None => {
Err(SchemaDefValidationError::ReferencedNamedTypeNotFound(
schema_being_validated.to_string(),
key_named_type.to_string(),
))
}
}
}
}?;
Self::validate_schema(
schema_being_validated,
&*def.value_type,
named_types,
validated_types,
)?;
Self::validate_schema(
schema_being_validated,
&*def.value_type,
named_types,
validated_types,
)?;
Ok(())
}
SchemaDefType::AssetRef(def) => {
match named_types.get(def) {
Some(SchemaDefNamedType::Record(_)) => {
Ok(())
}
Some(SchemaDefNamedType::Enum(_)) => {
Err(SchemaDefValidationError::InvalidAssetRefInnerType(
schema_being_validated.to_string(),
def.to_string(),
))
}
None => Err(SchemaDefValidationError::ReferencedNamedTypeNotFound(
schema_being_validated.to_string(),
def.to_string(),
)),
}
}
SchemaDefType::NamedType(type_name) => {
if validated_types.contains(type_name) {
return Ok(());
}
validated_types.insert(type_name.clone());
match named_types.get(type_name) {
Some(SchemaDefNamedType::Record(def)) => {
for field_def in def.fields() {
Self::validate_schema(
schema_being_validated,
&field_def.field_type,
named_types,
validated_types,
)?;
}
Ok(())
}
Some(SchemaDefNamedType::Enum(_)) => Ok(()),
None => Err(SchemaDefValidationError::ReferencedNamedTypeNotFound(
schema_being_validated.to_string(),
type_name.to_string(),
)),
}
}
}
}
pub fn link_schemas(mut self) -> SchemaLinkerResult<LinkedSchemas> {
for (_, named_type) in &mut self.types {
named_type.apply_type_aliases(&self.type_aliases);
}
let mut validated_types = Default::default();
for (schema_name, named_type) in &self.types {
Self::validate_schema(
schema_name,
&SchemaDefType::NamedType(named_type.type_name().to_string()),
&self.types,
&mut validated_types,
)
.map_err(|err| SchemaLinkerError::ValidationError(err))?;
}
let mut partial_hashes = HashMap::default();
for (type_name, named_type) in &self.types {
let mut hasher = siphasher::sip128::SipHasher::default();
named_type.partial_hash(&mut hasher);
let partial_fingerprint = hasher.finish128().as_u128();
partial_hashes.insert(type_name, partial_fingerprint);
}
let mut schemas_by_type_uuid: HashMap<Uuid, SchemaFingerprint> = Default::default();
let mut schemas_by_name: HashMap<String, SchemaFingerprint> = Default::default();
let mut schemas: HashMap<SchemaFingerprint, SchemaNamedType> = Default::default();
for (type_name, named_type) in &self.types {
let mut related_types = HashSet::default();
related_types.insert(type_name.clone());
loop {
let before_copy: Vec<_> = related_types.iter().cloned().collect();
for related_type in &before_copy {
let Some(related_type) = self.types.get(related_type) else {
panic!("Type named {} was referenced but undefined", related_type);
};
related_type.collect_all_related_types(&mut related_types);
}
if before_copy.len() == related_types.len() {
break;
}
}
named_type.collect_all_related_types(&mut related_types);
let mut related_types: Vec<_> = related_types.into_iter().collect();
related_types.sort();
let mut hasher = siphasher::sip128::SipHasher::default();
for related_type in &related_types {
let partial_hash = partial_hashes.get(related_type).unwrap();
partial_hash.hash(&mut hasher);
}
let fingerprint = SchemaFingerprint(hasher.finish128().as_u128());
schemas_by_type_uuid.insert(named_type.type_uuid(), fingerprint);
schemas_by_name.insert(type_name.to_string(), fingerprint);
}
for (_type_name, named_type) in &self.types {
let fingerprint = schemas_by_name.get(named_type.type_name()).unwrap();
let schema = named_type.to_schema(&self.types, &schemas_by_name);
schemas.insert(*fingerprint, schema);
}
Ok(LinkedSchemas {
schemas_by_type_uuid,
schemas_by_name,
schemas,
})
}
}
pub struct LinkedSchemas {
pub schemas_by_type_uuid: HashMap<Uuid, SchemaFingerprint>,
pub schemas_by_name: HashMap<String, SchemaFingerprint>,
pub schemas: HashMap<SchemaFingerprint, SchemaNamedType>,
}