data_modelling_core/export/
protobuf.rs

1//! Protobuf exporter for generating .proto files from data models.
2//!
3//! # Security
4//!
5//! All identifiers are sanitized to comply with Protobuf naming rules.
6//! Reserved words are prefixed with an underscore to avoid conflicts.
7
8use super::{ExportError, ExportResult};
9use crate::models::{DataModel, Table};
10
11/// Protobuf reserved words that cannot be used as field names.
12const PROTOBUF_RESERVED: &[&str] = &[
13    "syntax",
14    "import",
15    "weak",
16    "public",
17    "package",
18    "option",
19    "message",
20    "enum",
21    "service",
22    "extend",
23    "extensions",
24    "reserved",
25    "to",
26    "max",
27    "repeated",
28    "optional",
29    "required",
30    "group",
31    "oneof",
32    "map",
33    "returns",
34    "rpc",
35    "stream",
36    "true",
37    "false",
38];
39
40/// Exporter for Protobuf format.
41pub struct ProtobufExporter;
42
43impl ProtobufExporter {
44    /// Export tables to Protobuf format (SDK interface).
45    ///
46    /// # Arguments
47    ///
48    /// * `tables` - Slice of tables to export
49    ///
50    /// # Returns
51    ///
52    /// An `ExportResult` containing Protobuf `.proto` file content (proto3 by default).
53    ///
54    /// # Example
55    ///
56    /// ```rust
57    /// use data_modelling_core::export::protobuf::ProtobufExporter;
58    /// use data_modelling_core::models::{Table, Column};
59    ///
60    /// let tables = vec![
61    ///     Table::new("User".to_string(), vec![Column::new("id".to_string(), "INT64".to_string())]),
62    /// ];
63    ///
64    /// let exporter = ProtobufExporter;
65    /// let result = exporter.export(&tables).unwrap();
66    /// assert_eq!(result.format, "protobuf");
67    /// assert!(result.content.contains("syntax = \"proto3\""));
68    /// ```
69    pub fn export(&self, tables: &[Table]) -> Result<ExportResult, ExportError> {
70        self.export_with_version(tables, "proto3")
71    }
72
73    /// Export tables to Protobuf format with specified version.
74    ///
75    /// # Arguments
76    ///
77    /// * `tables` - Slice of tables to export
78    /// * `version` - Protobuf syntax version ("proto2" or "proto3")
79    ///
80    /// # Returns
81    ///
82    /// An `ExportResult` containing Protobuf `.proto` file content.
83    pub fn export_with_version(
84        &self,
85        tables: &[Table],
86        version: &str,
87    ) -> Result<ExportResult, ExportError> {
88        if version != "proto2" && version != "proto3" {
89            return Err(ExportError::InvalidArgument(format!(
90                "Invalid protobuf version: {}. Must be 'proto2' or 'proto3'",
91                version
92            )));
93        }
94        let proto = Self::export_model_from_tables_with_version(tables, version);
95        Ok(ExportResult {
96            content: proto,
97            format: "protobuf".to_string(),
98        })
99    }
100
101    fn export_model_from_tables_with_version(tables: &[Table], version: &str) -> String {
102        let mut proto = String::new();
103        proto.push_str(&format!("syntax = \"{}\";\n\n", version));
104        proto.push_str("package com.datamodel;\n\n");
105        let mut field_number = 0u32;
106        for table in tables {
107            proto.push_str(&Self::export_table_with_version(
108                table,
109                &mut field_number,
110                version,
111            ));
112            proto.push('\n');
113        }
114        proto
115    }
116
117    /// Export tags as Protobuf comments.
118    fn export_tags_as_comments(tags: &[crate::models::Tag]) -> String {
119        if tags.is_empty() {
120            return String::new();
121        }
122        let tag_strings: Vec<String> = tags.iter().map(|t| t.to_string()).collect();
123        format!("  // tags: {}\n", tag_strings.join(", "))
124    }
125
126    /// Export a table to Protobuf message format.
127    ///
128    /// # Arguments
129    ///
130    /// * `table` - The table to export
131    /// * `field_number` - Mutable reference to field number counter (incremented for each field)
132    ///
133    /// # Returns
134    ///
135    /// A Protobuf message definition as a string.
136    ///
137    /// # Example
138    ///
139    /// ```rust
140    /// use data_modelling_core::export::protobuf::ProtobufExporter;
141    /// use data_modelling_core::models::{Table, Column};
142    ///
143    /// let table = Table::new(
144    ///     "User".to_string(),
145    ///     vec![Column::new("id".to_string(), "INT64".to_string())],
146    /// );
147    ///
148    /// let mut field_number = 0u32;
149    /// let proto = ProtobufExporter::export_table(&table, &mut field_number);
150    /// assert!(proto.contains("message User"));
151    /// ```
152    /// Export a table to Protobuf message format (proto3 by default).
153    pub fn export_table(table: &Table, field_number: &mut u32) -> String {
154        Self::export_table_with_version(table, field_number, "proto3")
155    }
156
157    /// Export a table to Protobuf message format with specified version.
158    pub fn export_table_with_version(
159        table: &Table,
160        field_number: &mut u32,
161        version: &str,
162    ) -> String {
163        let mut proto = String::new();
164
165        let message_name = Self::sanitize_identifier(&table.name);
166        proto.push_str(&format!("message {} {{\n", message_name));
167
168        if !table.tags.is_empty() {
169            proto.push_str(&Self::export_tags_as_comments(&table.tags));
170        }
171
172        for column in &table.columns {
173            *field_number += 1;
174
175            let proto_type = Self::map_data_type_to_protobuf(&column.data_type);
176            let is_repeated = column.data_type.to_lowercase().contains("array");
177            let repeated = if is_repeated { "repeated " } else { "" };
178
179            let field_name = Self::sanitize_identifier(&column.name);
180
181            // Handle field labels based on proto version
182            let field_label = if is_repeated {
183                "" // Repeated fields don't need optional/required
184            } else if version == "proto2" {
185                // proto2: all fields need a label
186                if column.nullable {
187                    "optional "
188                } else {
189                    "required "
190                }
191            } else {
192                // proto3: optional only for nullable fields
193                if column.nullable { "optional " } else { "" }
194            };
195
196            proto.push_str(&format!(
197                "  {}{}{} {} = {};",
198                field_label, repeated, proto_type, field_name, field_number
199            ));
200
201            if !column.description.is_empty() {
202                let desc = column.description.replace('\n', " ").replace('\r', "");
203                proto.push_str(&format!(" // {}", desc));
204            }
205
206            proto.push('\n');
207        }
208
209        proto.push_str("}\n");
210        proto
211    }
212
213    /// Sanitize an identifier for use in Protobuf.
214    ///
215    /// - Replaces invalid characters with underscores
216    /// - Prefixes reserved words with underscore
217    /// - Ensures identifier starts with a letter or underscore
218    fn sanitize_identifier(name: &str) -> String {
219        // Replace dots (nested columns) and other invalid chars with underscores
220        let mut sanitized: String = name
221            .chars()
222            .map(|c| {
223                if c.is_alphanumeric() || c == '_' {
224                    c
225                } else {
226                    '_'
227                }
228            })
229            .collect();
230
231        // Ensure starts with letter or underscore
232        if let Some(first) = sanitized.chars().next()
233            && first.is_numeric()
234        {
235            sanitized = format!("_{}", sanitized);
236        }
237
238        // Handle reserved words
239        if PROTOBUF_RESERVED.contains(&sanitized.to_lowercase().as_str()) {
240            sanitized = format!("_{}", sanitized);
241        }
242
243        sanitized
244    }
245
246    /// Export a data model to Protobuf format (legacy method for compatibility, proto3).
247    pub fn export_model(model: &DataModel, table_ids: Option<&[uuid::Uuid]>) -> String {
248        let tables_to_export: Vec<&Table> = if let Some(ids) = table_ids {
249            model
250                .tables
251                .iter()
252                .filter(|t| ids.contains(&t.id))
253                .collect()
254        } else {
255            model.tables.iter().collect()
256        };
257
258        // Convert Vec<&Table> to &[Table] by cloning
259        let tables: Vec<Table> = tables_to_export.iter().map(|t| (*t).clone()).collect();
260        Self::export_model_from_tables_with_version(&tables, "proto3")
261    }
262
263    /// Map SQL/ODCL data types to Protobuf types.
264    ///
265    /// Note: For timestamp types, this returns basic proto types. If you need
266    /// google.protobuf.Timestamp or wrapper types, consider using the wrapper
267    /// type export option (future enhancement).
268    fn map_data_type_to_protobuf(data_type: &str) -> String {
269        let dt_lower = data_type.to_lowercase();
270
271        match dt_lower.as_str() {
272            "int" | "integer" | "smallint" | "tinyint" | "int32" => "int32".to_string(),
273            "bigint" | "int64" | "long" => "int64".to_string(),
274            "float" | "real" => "float".to_string(),
275            "double" | "decimal" | "numeric" => "double".to_string(),
276            "boolean" | "bool" => "bool".to_string(),
277            "bytes" | "binary" | "varbinary" => "bytes".to_string(),
278            // Temporal types - use int64 for timestamps (epoch millis) or string
279            "timestamp" | "datetime" => "int64".to_string(),
280            "date" | "time" => "string".to_string(),
281            "uuid" => "string".to_string(),
282            _ => {
283                // Default to string for VARCHAR, TEXT, CHAR, etc.
284                "string".to_string()
285            }
286        }
287    }
288}