use crate::registry::SchemaRegistry;
use crate::types::{
EnumDefinition, PrimitiveType, SchemaDefinition, SchemaType, StructDefinition, StructField,
VariantData,
};
use serde_json::{json, Map, Value};
pub fn generate(definition: &SchemaDefinition) -> String {
let value = generate_value(definition);
serde_json::to_string_pretty(&value).unwrap_or_else(|_| "{}".to_string())
}
pub fn generate_value(definition: &SchemaDefinition) -> Value {
let mut schema = generate_named_type(&definition.name, &definition.schema_type);
if let Value::Object(ref mut map) = schema {
if let Some(ref desc) = definition.description {
map.insert("doc".to_string(), json!(desc));
}
}
schema
}
pub fn generate_bundle(registry: &SchemaRegistry) -> String {
let definitions: Vec<_> = match registry.topological_sort() {
Ok(sorted) => sorted,
Err(_) => registry.definitions().map(|(_, def)| def).collect(),
};
let schemas: Vec<Value> = definitions
.iter()
.filter_map(|def| {
match &def.schema_type {
SchemaType::Struct(_) | SchemaType::Enum(_) => Some(generate_value(def)),
SchemaType::Newtype(n) => {
Some(generate_named_type(&def.name, &SchemaType::Newtype(n.clone())))
}
_ => None,
}
})
.collect();
serde_json::to_string_pretty(&schemas).unwrap_or_else(|_| "[]".to_string())
}
fn generate_named_type(name: &str, schema_type: &SchemaType) -> Value {
match schema_type {
SchemaType::Struct(def) => generate_record(name, def),
SchemaType::Enum(def) => generate_avro_enum(name, def),
SchemaType::Newtype(def) => {
let mut fields = Vec::new();
fields.push(json!({
"name": "value",
"type": generate_type_schema(&def.inner_type)
}));
json!({
"type": "record",
"name": name,
"fields": fields
})
}
_ => generate_type_schema(schema_type),
}
}
fn generate_record(name: &str, def: &StructDefinition) -> Value {
let mut fields = Vec::new();
if def.is_tuple_struct {
for (i, field) in def.fields.values().enumerate() {
let mut field_schema = Map::new();
field_schema.insert("name".to_string(), json!(format!("field_{}", i)));
field_schema.insert("type".to_string(), generate_field_type(field));
if let Some(ref desc) = field.description {
field_schema.insert("doc".to_string(), json!(desc));
}
if let Some(ref default) = field.default {
field_schema.insert("default".to_string(), default.clone());
}
fields.push(Value::Object(field_schema));
}
} else {
for (field_name, field) in &def.fields {
let mut field_schema = Map::new();
field_schema.insert("name".to_string(), json!(to_avro_name(field_name)));
field_schema.insert("type".to_string(), generate_field_type(field));
if let Some(ref desc) = field.description {
field_schema.insert("doc".to_string(), json!(desc));
}
if let Some(ref default) = field.default {
field_schema.insert("default".to_string(), default.clone());
}
fields.push(Value::Object(field_schema));
}
}
json!({
"type": "record",
"name": name,
"fields": fields
})
}
fn generate_field_type(field: &StructField) -> Value {
let base_type = generate_type_schema(&field.schema_type);
if !field.required && !matches!(field.schema_type, SchemaType::Option(_)) {
json!(["null", base_type])
} else {
base_type
}
}
fn generate_avro_enum(name: &str, def: &EnumDefinition) -> Value {
if def.is_simple_enum() {
let symbols: Vec<String> = def
.variants
.iter()
.map(|v| to_avro_name(&v.name))
.collect();
return json!({
"type": "enum",
"name": name,
"symbols": symbols
});
}
let variant_types: Vec<Value> = def
.variants
.iter()
.map(|variant| {
let variant_name = format!("{}_{}", name, variant.name);
match &variant.data {
VariantData::Unit => {
json!({
"type": "record",
"name": variant_name,
"fields": []
})
}
VariantData::Newtype(inner) => {
json!({
"type": "record",
"name": variant_name,
"fields": [{
"name": "value",
"type": generate_type_schema(inner)
}]
})
}
VariantData::Tuple(types) => {
let fields: Vec<Value> = types
.iter()
.enumerate()
.map(|(i, t)| {
json!({
"name": format!("field_{}", i),
"type": generate_type_schema(t)
})
})
.collect();
json!({
"type": "record",
"name": variant_name,
"fields": fields
})
}
VariantData::Struct(fields) => {
let avro_fields: Vec<Value> = fields
.iter()
.map(|(field_name, field)| {
let mut field_schema = Map::new();
field_schema
.insert("name".to_string(), json!(to_avro_name(field_name)));
field_schema.insert("type".to_string(), generate_field_type(field));
if let Some(ref desc) = field.description {
field_schema.insert("doc".to_string(), json!(desc));
}
Value::Object(field_schema)
})
.collect();
json!({
"type": "record",
"name": variant_name,
"fields": avro_fields
})
}
}
})
.collect();
Value::Array(variant_types)
}
fn generate_type_schema(schema_type: &SchemaType) -> Value {
match schema_type {
SchemaType::Primitive(prim) => generate_primitive_type(prim),
SchemaType::Option(inner) => {
let inner_schema = generate_type_schema(inner);
json!(["null", inner_schema])
}
SchemaType::Array(inner) => {
let items_schema = generate_type_schema(inner);
json!({
"type": "array",
"items": items_schema
})
}
SchemaType::Set(inner) => {
let items_schema = generate_type_schema(inner);
json!({
"type": "array",
"items": items_schema
})
}
SchemaType::Map(value_type) => {
let value_schema = generate_type_schema(value_type);
json!({
"type": "map",
"values": value_schema
})
}
SchemaType::Tuple(types) => {
let fields: Vec<Value> = types
.iter()
.enumerate()
.map(|(i, t)| {
json!({
"name": format!("field_{}", i),
"type": generate_type_schema(t)
})
})
.collect();
json!({
"type": "record",
"name": format!("Tuple{}", types.len()),
"fields": fields
})
}
SchemaType::Struct(def) => {
let fields: Vec<Value> = def
.fields
.iter()
.map(|(name, field)| {
json!({
"name": to_avro_name(name),
"type": generate_field_type(field)
})
})
.collect();
json!({
"type": "record",
"name": "AnonymousRecord",
"fields": fields
})
}
SchemaType::Enum(def) => {
if def.is_simple_enum() {
let symbols: Vec<String> =
def.variants.iter().map(|v| v.name.clone()).collect();
json!({
"type": "enum",
"name": "AnonymousEnum",
"symbols": symbols
})
} else {
json!("string")
}
}
SchemaType::Newtype(def) => generate_type_schema(&def.inner_type),
SchemaType::Reference(name) => json!(name),
SchemaType::Unit => json!("null"),
SchemaType::Any => {
json!("bytes")
}
}
}
fn generate_primitive_type(prim: &PrimitiveType) -> Value {
match prim {
PrimitiveType::Bool => json!("boolean"),
PrimitiveType::I8 | PrimitiveType::I16 | PrimitiveType::I32 => json!("int"),
PrimitiveType::I64 | PrimitiveType::Isize => json!("long"),
PrimitiveType::I128 => {
json!({
"type": "bytes",
"logicalType": "decimal",
"precision": 39,
"scale": 0
})
}
PrimitiveType::U8 | PrimitiveType::U16 => json!("int"),
PrimitiveType::U32 => json!("long"),
PrimitiveType::U64 | PrimitiveType::Usize => json!("long"),
PrimitiveType::U128 => {
json!({
"type": "bytes",
"logicalType": "decimal",
"precision": 39,
"scale": 0
})
}
PrimitiveType::F32 => json!("float"),
PrimitiveType::F64 => json!("double"),
PrimitiveType::Char => json!("string"),
PrimitiveType::String => json!("string"),
PrimitiveType::Bytes => json!("bytes"),
}
}
fn to_avro_name(name: &str) -> String {
let mut result = String::with_capacity(name.len());
for (i, c) in name.chars().enumerate() {
if c.is_alphanumeric() || c == '_' {
if i == 0 && c.is_ascii_digit() {
result.push('_');
}
result.push(c);
} else {
result.push('_');
}
}
if result.is_empty() {
result.push_str("_field");
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{EnumRepresentation, EnumVariant, NewtypeDefinition};
use indexmap::IndexMap;
#[test]
fn test_primitive_types() {
assert_eq!(generate_primitive_type(&PrimitiveType::Bool), json!("boolean"));
assert_eq!(generate_primitive_type(&PrimitiveType::I32), json!("int"));
assert_eq!(generate_primitive_type(&PrimitiveType::I64), json!("long"));
assert_eq!(generate_primitive_type(&PrimitiveType::F32), json!("float"));
assert_eq!(generate_primitive_type(&PrimitiveType::F64), json!("double"));
assert_eq!(generate_primitive_type(&PrimitiveType::String), json!("string"));
assert_eq!(generate_primitive_type(&PrimitiveType::Bytes), json!("bytes"));
}
#[test]
fn test_option_type() {
let opt = SchemaType::Option(Box::new(SchemaType::Primitive(PrimitiveType::String)));
let schema = generate_type_schema(&opt);
assert_eq!(schema, json!(["null", "string"]));
}
#[test]
fn test_array_type() {
let arr = SchemaType::Array(Box::new(SchemaType::Primitive(PrimitiveType::I32)));
let schema = generate_type_schema(&arr);
assert_eq!(schema["type"], "array");
assert_eq!(schema["items"], "int");
}
#[test]
fn test_map_type() {
let map = SchemaType::Map(Box::new(SchemaType::Primitive(PrimitiveType::String)));
let schema = generate_type_schema(&map);
assert_eq!(schema["type"], "map");
assert_eq!(schema["values"], "string");
}
#[test]
fn test_reference_type() {
let ref_type = SchemaType::Reference("User".to_string());
assert_eq!(generate_type_schema(&ref_type), json!("User"));
}
#[test]
fn test_simple_record() {
let def = StructDefinition::new()
.with_field(
"name",
StructField::new(SchemaType::Primitive(PrimitiveType::String), "name"),
)
.with_field(
"age",
StructField::new(
SchemaType::Option(Box::new(SchemaType::Primitive(PrimitiveType::I32))),
"age",
),
);
let schema = generate_record("User", &def);
assert_eq!(schema["type"], "record");
assert_eq!(schema["name"], "User");
assert_eq!(schema["fields"].as_array().unwrap().len(), 2);
}
#[test]
fn test_simple_enum() {
let def = EnumDefinition::new(EnumRepresentation::External)
.with_variant(EnumVariant::unit("Active"))
.with_variant(EnumVariant::unit("Inactive"));
let schema = generate_avro_enum("Status", &def);
assert_eq!(schema["type"], "enum");
assert_eq!(schema["name"], "Status");
assert!(schema["symbols"].as_array().unwrap().contains(&json!("Active")));
}
#[test]
fn test_complex_enum() {
let mut fields = IndexMap::new();
fields.insert(
"reason".to_string(),
StructField::new(SchemaType::Primitive(PrimitiveType::String), "reason"),
);
let def = EnumDefinition::new(EnumRepresentation::External)
.with_variant(EnumVariant::unit("Active"))
.with_variant(EnumVariant::struct_variant("Suspended", fields));
let schema = generate_avro_enum("Status", &def);
assert!(schema.is_array());
assert_eq!(schema.as_array().unwrap().len(), 2);
}
#[test]
fn test_avro_name_conversion() {
assert_eq!(to_avro_name("normalName"), "normalName");
assert_eq!(to_avro_name("123invalid"), "_123invalid");
assert_eq!(to_avro_name("has-dash"), "has_dash");
assert_eq!(to_avro_name("has space"), "has_space");
}
#[test]
fn test_full_definition() {
let def = SchemaDefinition::new(
"User",
SchemaType::Struct(
StructDefinition::new().with_field(
"id",
StructField::new(SchemaType::Primitive(PrimitiveType::U64), "id"),
),
),
)
.with_description("A user in the system");
let schema = generate_value(&def);
assert_eq!(schema["type"], "record");
assert_eq!(schema["name"], "User");
assert_eq!(schema["doc"], "A user in the system");
}
#[test]
fn test_newtype() {
let newtype = NewtypeDefinition::new(SchemaType::Primitive(PrimitiveType::String));
let def = SchemaDefinition::new("Email", SchemaType::Newtype(newtype));
let schema = generate_value(&def);
assert_eq!(schema["type"], "record");
assert_eq!(schema["name"], "Email");
assert_eq!(schema["fields"][0]["name"], "value");
}
}