Skip to main content

switchback_avro/schema/
ir.rs

1//! Avro schema intermediate representation.
2
3use serde_json::Value;
4
5/// Parsed Avro schema node.
6#[derive(Clone, Debug, PartialEq)]
7pub enum AvroSchema {
8    /// Avro primitive type name or primitive-with-metadata object.
9    Primitive(AvroPrimitive),
10    /// Named type reference (FQCN-style string).
11    NamedRef(String),
12    /// Record definition.
13    Record(AvroRecord),
14    /// Enum definition.
15    Enum(AvroEnum),
16    /// Array type.
17    Array(AvroArray),
18    /// Map type.
19    Map(AvroMap),
20    /// Fixed type.
21    Fixed(AvroFixed),
22    /// Union of types.
23    Union(AvroUnion),
24}
25
26/// Avro primitive type.
27#[derive(Clone, Copy, Debug, PartialEq, Eq)]
28pub enum AvroPrimitive {
29    Null,
30    Boolean,
31    Int,
32    Long,
33    Float,
34    Double,
35    Bytes,
36    String,
37}
38
39/// Record field.
40#[derive(Clone, Debug, PartialEq)]
41pub struct AvroField {
42    pub name: String,
43    pub schema: AvroSchema,
44    pub default: Option<Value>,
45}
46
47/// Avro record type.
48#[derive(Clone, Debug, PartialEq)]
49pub struct AvroRecord {
50    pub name: String,
51    pub namespace: Option<String>,
52    pub fields: Vec<AvroField>,
53}
54
55/// Avro enum type.
56#[derive(Clone, Debug, PartialEq)]
57pub struct AvroEnum {
58    pub name: String,
59    pub namespace: Option<String>,
60    pub symbols: Vec<String>,
61}
62
63/// Avro array type.
64#[derive(Clone, Debug, PartialEq)]
65pub struct AvroArray {
66    pub items: Box<AvroSchema>,
67}
68
69/// Avro map type.
70#[derive(Clone, Debug, PartialEq)]
71pub struct AvroMap {
72    pub values: Box<AvroSchema>,
73}
74
75/// Avro fixed type.
76#[derive(Clone, Debug, PartialEq)]
77pub struct AvroFixed {
78    pub name: String,
79    pub namespace: Option<String>,
80    pub size: u32,
81}
82
83/// Avro union type.
84#[derive(Clone, Debug, PartialEq)]
85pub struct AvroUnion {
86    pub variants: Vec<AvroSchema>,
87}
88
89impl AvroSchema {
90    /// Parse an Avro schema JSON value into IR.
91    pub fn from_value(value: &Value) -> Self {
92        match value {
93            Value::String(s) => primitive_or_named(s),
94            Value::Array(items) => Self::Union(AvroUnion {
95                variants: items.iter().map(Self::from_value).collect(),
96            }),
97            Value::Object(map) => object_schema(map),
98            _ => Self::Primitive(AvroPrimitive::Null),
99        }
100    }
101
102    /// True when the JSON value looks like an Avro schema document.
103    pub fn is_schema_value(value: &Value) -> bool {
104        match value {
105            Value::String(s) => is_primitive_name(s) || is_named_ref(s),
106            Value::Array(_) => true,
107            Value::Object(map) => map.contains_key("type") || map.contains_key("name"),
108            _ => false,
109        }
110    }
111}
112
113fn object_schema(map: &serde_json::Map<String, Value>) -> AvroSchema {
114    if map.get("type").is_none() && map.contains_key("fields") {
115        return AvroSchema::Record(parse_record(map));
116    }
117
118    let ty = map.get("type").and_then(|v| v.as_str()).unwrap_or_default();
119    match ty {
120        "record" => AvroSchema::Record(parse_record(map)),
121        "enum" => AvroSchema::Enum(AvroEnum {
122            name: string_field(map, "name"),
123            namespace: optional_string(map, "namespace"),
124            symbols: map
125                .get("symbols")
126                .and_then(|v| v.as_array())
127                .map(|syms| {
128                    syms.iter()
129                        .filter_map(|s| s.as_str().map(str::to_string))
130                        .collect()
131                })
132                .unwrap_or_default(),
133        }),
134        "array" => AvroSchema::Array(AvroArray {
135            items: Box::new(
136                map.get("items")
137                    .map(AvroSchema::from_value)
138                    .unwrap_or(AvroSchema::Primitive(AvroPrimitive::Null)),
139            ),
140        }),
141        "map" => AvroSchema::Map(AvroMap {
142            values: Box::new(
143                map.get("values")
144                    .map(AvroSchema::from_value)
145                    .unwrap_or(AvroSchema::Primitive(AvroPrimitive::Null)),
146            ),
147        }),
148        "fixed" => AvroSchema::Fixed(AvroFixed {
149            name: string_field(map, "name"),
150            namespace: optional_string(map, "namespace"),
151            size: map.get("size").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
152        }),
153        other => {
154            if let Some(p) = parse_primitive(other) {
155                AvroSchema::Primitive(p)
156            } else {
157                AvroSchema::NamedRef(other.to_string())
158            }
159        }
160    }
161}
162
163fn parse_record(map: &serde_json::Map<String, Value>) -> AvroRecord {
164    AvroRecord {
165        name: string_field(map, "name"),
166        namespace: optional_string(map, "namespace"),
167        fields: map
168            .get("fields")
169            .and_then(|v| v.as_array())
170            .map(|fields| {
171                fields
172                    .iter()
173                    .filter_map(|field| {
174                        let obj = field.as_object()?;
175                        let name = obj.get("name").and_then(|v| v.as_str())?;
176                        let schema = obj
177                            .get("type")
178                            .map(AvroSchema::from_value)
179                            .unwrap_or(AvroSchema::Primitive(AvroPrimitive::Null));
180                        Some(AvroField {
181                            name: name.to_string(),
182                            schema,
183                            default: obj.get("default").cloned(),
184                        })
185                    })
186                    .collect()
187            })
188            .unwrap_or_default(),
189    }
190}
191
192fn primitive_or_named(s: &str) -> AvroSchema {
193    if let Some(p) = parse_primitive(s) {
194        AvroSchema::Primitive(p)
195    } else {
196        AvroSchema::NamedRef(s.to_string())
197    }
198}
199
200fn parse_primitive(name: &str) -> Option<AvroPrimitive> {
201    match name {
202        "null" => Some(AvroPrimitive::Null),
203        "boolean" => Some(AvroPrimitive::Boolean),
204        "int" => Some(AvroPrimitive::Int),
205        "long" => Some(AvroPrimitive::Long),
206        "float" => Some(AvroPrimitive::Float),
207        "double" => Some(AvroPrimitive::Double),
208        "bytes" => Some(AvroPrimitive::Bytes),
209        "string" => Some(AvroPrimitive::String),
210        _ => None,
211    }
212}
213
214fn is_primitive_name(s: &str) -> bool {
215    parse_primitive(s).is_some()
216}
217
218fn is_named_ref(s: &str) -> bool {
219    !s.is_empty()
220        && s.chars()
221            .next()
222            .is_some_and(|c| c.is_ascii_alphabetic() || c == '_')
223}
224
225fn string_field(map: &serde_json::Map<String, Value>, key: &str) -> String {
226    map.get(key)
227        .and_then(|v| v.as_str())
228        .unwrap_or_default()
229        .to_string()
230}
231
232fn optional_string(map: &serde_json::Map<String, Value>, key: &str) -> Option<String> {
233    map.get(key)
234        .and_then(|v| v.as_str())
235        .map(str::to_string)
236        .filter(|s| !s.is_empty())
237}