data_modelling_core/export/
avro.rs

1//! AVRO schema exporter for generating AVRO schemas from data models.
2
3use super::{ExportError, ExportResult};
4use crate::models::{DataModel, Table};
5use serde_json::{Value, json};
6
7/// Exporter for AVRO schema format.
8pub struct AvroExporter;
9
10impl AvroExporter {
11    /// Export tables to AVRO schema format (SDK interface).
12    ///
13    /// # Arguments
14    ///
15    /// * `tables` - Slice of tables to export
16    ///
17    /// # Returns
18    ///
19    /// An `ExportResult` containing AVRO schema(s) as JSON.
20    /// If a single table is provided, returns a single schema object.
21    /// If multiple tables are provided, returns an array of schemas.
22    ///
23    /// # Example
24    ///
25    /// ```rust
26    /// use data_modelling_core::export::avro::AvroExporter;
27    /// use data_modelling_core::models::{Table, Column};
28    ///
29    /// let tables = vec![
30    ///     Table::new("User".to_string(), vec![Column::new("id".to_string(), "INT64".to_string())]),
31    /// ];
32    ///
33    /// let exporter = AvroExporter;
34    /// let result = exporter.export(&tables).unwrap();
35    /// assert_eq!(result.format, "avro");
36    /// ```
37    pub fn export(&self, tables: &[Table]) -> Result<ExportResult, ExportError> {
38        let schema = Self::export_model_from_tables(tables);
39        let content = serde_json::to_string_pretty(&schema)
40            .map_err(|e| ExportError::SerializationError(e.to_string()))?;
41
42        // Validate exported AVRO schema
43        {
44            use crate::validation::schema::validate_avro_internal;
45            validate_avro_internal(&content).map_err(|e| {
46                ExportError::ValidationError(format!("AVRO validation failed: {}", e))
47            })?;
48        }
49
50        Ok(ExportResult {
51            content,
52            format: "avro".to_string(),
53        })
54    }
55
56    fn export_model_from_tables(tables: &[Table]) -> serde_json::Value {
57        if tables.len() == 1 {
58            Self::export_table(&tables[0])
59        } else {
60            let schemas: Vec<serde_json::Value> = tables.iter().map(Self::export_table).collect();
61            serde_json::json!(schemas)
62        }
63    }
64
65    /// Export a table to AVRO schema format.
66    ///
67    /// # Arguments
68    ///
69    /// * `table` - The table to export
70    ///
71    /// # Returns
72    ///
73    /// A `serde_json::Value` representing the AVRO schema for the table.
74    ///
75    /// # Example
76    ///
77    /// ```rust
78    /// use data_modelling_core::export::avro::AvroExporter;
79    /// use data_modelling_core::models::{Table, Column};
80    ///
81    /// let table = Table::new(
82    ///     "User".to_string(),
83    ///     vec![Column::new("id".to_string(), "INT64".to_string())],
84    /// );
85    ///
86    /// let schema = AvroExporter::export_table(&table);
87    /// assert_eq!(schema["type"], "record");
88    /// assert_eq!(schema["name"], "User");
89    /// ```
90    pub fn export_table(table: &Table) -> Value {
91        let mut fields = Vec::new();
92
93        for column in &table.columns {
94            let mut field = serde_json::Map::new();
95            field.insert("name".to_string(), json!(column.name));
96
97            // Map data type to AVRO type
98            let avro_type = Self::map_data_type_to_avro(&column.data_type, column.nullable);
99            field.insert("type".to_string(), avro_type);
100
101            if !column.description.is_empty() {
102                field.insert("doc".to_string(), json!(column.description));
103            }
104
105            fields.push(json!(field));
106        }
107
108        let mut schema = serde_json::Map::new();
109        schema.insert("type".to_string(), json!("record"));
110        schema.insert("name".to_string(), json!(table.name));
111
112        // Add tags if present (AVRO doesn't have standard tags, but we can add them as metadata)
113        if !table.tags.is_empty() {
114            let tags_array: Vec<String> = table.tags.iter().map(|t| t.to_string()).collect();
115            schema.insert("tags".to_string(), json!(tags_array));
116        }
117        schema.insert("namespace".to_string(), json!("com.datamodel"));
118        schema.insert("fields".to_string(), json!(fields));
119
120        json!(schema)
121    }
122
123    /// Export a data model to AVRO schema format (legacy method for compatibility).
124    pub fn export_model(model: &DataModel, table_ids: Option<&[uuid::Uuid]>) -> Value {
125        let tables_to_export: Vec<&Table> = if let Some(ids) = table_ids {
126            model
127                .tables
128                .iter()
129                .filter(|t| ids.contains(&t.id))
130                .collect()
131        } else {
132            model.tables.iter().collect()
133        };
134
135        if tables_to_export.len() == 1 {
136            // Single table: return the schema directly
137            Self::export_table(tables_to_export[0])
138        } else {
139            // Multiple tables: return array of schemas
140            let schemas: Vec<Value> = tables_to_export
141                .iter()
142                .map(|t| Self::export_table(t))
143                .collect();
144            json!(schemas)
145        }
146    }
147
148    /// Map SQL/ODCL data types to AVRO types.
149    fn map_data_type_to_avro(data_type: &str, nullable: bool) -> Value {
150        let dt_lower = data_type.to_lowercase();
151
152        let avro_type = match dt_lower.as_str() {
153            "int" | "integer" | "smallint" | "tinyint" => json!("int"),
154            "bigint" => json!("long"),
155            "float" | "real" => json!("float"),
156            "double" | "decimal" | "numeric" => json!("double"),
157            "boolean" | "bool" => json!("boolean"),
158            "bytes" | "binary" | "varbinary" => json!("bytes"),
159            _ => {
160                // Default to string for VARCHAR, TEXT, CHAR, DATE, TIMESTAMP, etc.
161                json!("string")
162            }
163        };
164
165        if nullable {
166            json!(["null", avro_type])
167        } else {
168            avro_type
169        }
170    }
171}