data_modelling_core/export/protobuf.rs
1//! Protobuf exporter for generating .proto files from data models.
2//!
3//! # Security
4//!
5//! All identifiers are sanitized to comply with Protobuf naming rules.
6//! Reserved words are prefixed with an underscore to avoid conflicts.
7
8use super::{ExportError, ExportResult};
9use crate::models::{DataModel, Table};
10
11/// Protobuf reserved words that cannot be used as field names.
12const PROTOBUF_RESERVED: &[&str] = &[
13 "syntax",
14 "import",
15 "weak",
16 "public",
17 "package",
18 "option",
19 "message",
20 "enum",
21 "service",
22 "extend",
23 "extensions",
24 "reserved",
25 "to",
26 "max",
27 "repeated",
28 "optional",
29 "required",
30 "group",
31 "oneof",
32 "map",
33 "returns",
34 "rpc",
35 "stream",
36 "true",
37 "false",
38];
39
40/// Exporter for Protobuf format.
41pub struct ProtobufExporter;
42
43impl ProtobufExporter {
44 /// Export tables to Protobuf format (SDK interface).
45 ///
46 /// # Arguments
47 ///
48 /// * `tables` - Slice of tables to export
49 ///
50 /// # Returns
51 ///
52 /// An `ExportResult` containing Protobuf `.proto` file content (proto3 by default).
53 ///
54 /// # Example
55 ///
56 /// ```rust
57 /// use data_modelling_core::export::protobuf::ProtobufExporter;
58 /// use data_modelling_core::models::{Table, Column};
59 ///
60 /// let tables = vec![
61 /// Table::new("User".to_string(), vec![Column::new("id".to_string(), "INT64".to_string())]),
62 /// ];
63 ///
64 /// let exporter = ProtobufExporter;
65 /// let result = exporter.export(&tables).unwrap();
66 /// assert_eq!(result.format, "protobuf");
67 /// assert!(result.content.contains("syntax = \"proto3\""));
68 /// ```
69 pub fn export(&self, tables: &[Table]) -> Result<ExportResult, ExportError> {
70 self.export_with_version(tables, "proto3")
71 }
72
73 /// Export tables to Protobuf format with specified version.
74 ///
75 /// # Arguments
76 ///
77 /// * `tables` - Slice of tables to export
78 /// * `version` - Protobuf syntax version ("proto2" or "proto3")
79 ///
80 /// # Returns
81 ///
82 /// An `ExportResult` containing Protobuf `.proto` file content.
83 pub fn export_with_version(
84 &self,
85 tables: &[Table],
86 version: &str,
87 ) -> Result<ExportResult, ExportError> {
88 if version != "proto2" && version != "proto3" {
89 return Err(ExportError::InvalidArgument(format!(
90 "Invalid protobuf version: {}. Must be 'proto2' or 'proto3'",
91 version
92 )));
93 }
94 let proto = Self::export_model_from_tables_with_version(tables, version);
95 Ok(ExportResult {
96 content: proto,
97 format: "protobuf".to_string(),
98 })
99 }
100
101 fn export_model_from_tables_with_version(tables: &[Table], version: &str) -> String {
102 let mut proto = String::new();
103 proto.push_str(&format!("syntax = \"{}\";\n\n", version));
104 proto.push_str("package com.datamodel;\n\n");
105 let mut field_number = 0u32;
106 for table in tables {
107 proto.push_str(&Self::export_table_with_version(
108 table,
109 &mut field_number,
110 version,
111 ));
112 proto.push('\n');
113 }
114 proto
115 }
116
117 /// Export tags as Protobuf comments.
118 fn export_tags_as_comments(tags: &[crate::models::Tag]) -> String {
119 if tags.is_empty() {
120 return String::new();
121 }
122 let tag_strings: Vec<String> = tags.iter().map(|t| t.to_string()).collect();
123 format!(" // tags: {}\n", tag_strings.join(", "))
124 }
125
126 /// Export a table to Protobuf message format.
127 ///
128 /// # Arguments
129 ///
130 /// * `table` - The table to export
131 /// * `field_number` - Mutable reference to field number counter (incremented for each field)
132 ///
133 /// # Returns
134 ///
135 /// A Protobuf message definition as a string.
136 ///
137 /// # Example
138 ///
139 /// ```rust
140 /// use data_modelling_core::export::protobuf::ProtobufExporter;
141 /// use data_modelling_core::models::{Table, Column};
142 ///
143 /// let table = Table::new(
144 /// "User".to_string(),
145 /// vec![Column::new("id".to_string(), "INT64".to_string())],
146 /// );
147 ///
148 /// let mut field_number = 0u32;
149 /// let proto = ProtobufExporter::export_table(&table, &mut field_number);
150 /// assert!(proto.contains("message User"));
151 /// ```
152 /// Export a table to Protobuf message format (proto3 by default).
153 pub fn export_table(table: &Table, field_number: &mut u32) -> String {
154 Self::export_table_with_version(table, field_number, "proto3")
155 }
156
157 /// Export a table to Protobuf message format with specified version.
158 pub fn export_table_with_version(
159 table: &Table,
160 field_number: &mut u32,
161 version: &str,
162 ) -> String {
163 let mut proto = String::new();
164
165 let message_name = Self::sanitize_identifier(&table.name);
166 proto.push_str(&format!("message {} {{\n", message_name));
167
168 if !table.tags.is_empty() {
169 proto.push_str(&Self::export_tags_as_comments(&table.tags));
170 }
171
172 for column in &table.columns {
173 *field_number += 1;
174
175 let proto_type = Self::map_data_type_to_protobuf(&column.data_type);
176 let is_repeated = column.data_type.to_lowercase().contains("array");
177 let repeated = if is_repeated { "repeated " } else { "" };
178
179 let field_name = Self::sanitize_identifier(&column.name);
180
181 // Handle field labels based on proto version
182 let field_label = if is_repeated {
183 "" // Repeated fields don't need optional/required
184 } else if version == "proto2" {
185 // proto2: all fields need a label
186 if column.nullable {
187 "optional "
188 } else {
189 "required "
190 }
191 } else {
192 // proto3: optional only for nullable fields
193 if column.nullable { "optional " } else { "" }
194 };
195
196 proto.push_str(&format!(
197 " {}{}{} {} = {};",
198 field_label, repeated, proto_type, field_name, field_number
199 ));
200
201 if !column.description.is_empty() {
202 let desc = column.description.replace('\n', " ").replace('\r', "");
203 proto.push_str(&format!(" // {}", desc));
204 }
205
206 proto.push('\n');
207 }
208
209 proto.push_str("}\n");
210 proto
211 }
212
213 /// Sanitize an identifier for use in Protobuf.
214 ///
215 /// - Replaces invalid characters with underscores
216 /// - Prefixes reserved words with underscore
217 /// - Ensures identifier starts with a letter or underscore
218 fn sanitize_identifier(name: &str) -> String {
219 // Replace dots (nested columns) and other invalid chars with underscores
220 let mut sanitized: String = name
221 .chars()
222 .map(|c| {
223 if c.is_alphanumeric() || c == '_' {
224 c
225 } else {
226 '_'
227 }
228 })
229 .collect();
230
231 // Ensure starts with letter or underscore
232 if let Some(first) = sanitized.chars().next()
233 && first.is_numeric()
234 {
235 sanitized = format!("_{}", sanitized);
236 }
237
238 // Handle reserved words
239 if PROTOBUF_RESERVED.contains(&sanitized.to_lowercase().as_str()) {
240 sanitized = format!("_{}", sanitized);
241 }
242
243 sanitized
244 }
245
246 /// Export a data model to Protobuf format (legacy method for compatibility, proto3).
247 pub fn export_model(model: &DataModel, table_ids: Option<&[uuid::Uuid]>) -> String {
248 let tables_to_export: Vec<&Table> = if let Some(ids) = table_ids {
249 model
250 .tables
251 .iter()
252 .filter(|t| ids.contains(&t.id))
253 .collect()
254 } else {
255 model.tables.iter().collect()
256 };
257
258 // Convert Vec<&Table> to &[Table] by cloning
259 let tables: Vec<Table> = tables_to_export.iter().map(|t| (*t).clone()).collect();
260 Self::export_model_from_tables_with_version(&tables, "proto3")
261 }
262
263 /// Map SQL/ODCL data types to Protobuf types.
264 ///
265 /// Note: For timestamp types, this returns basic proto types. If you need
266 /// google.protobuf.Timestamp or wrapper types, consider using the wrapper
267 /// type export option (future enhancement).
268 fn map_data_type_to_protobuf(data_type: &str) -> String {
269 let dt_lower = data_type.to_lowercase();
270
271 match dt_lower.as_str() {
272 "int" | "integer" | "smallint" | "tinyint" | "int32" => "int32".to_string(),
273 "bigint" | "int64" | "long" => "int64".to_string(),
274 "float" | "real" => "float".to_string(),
275 "double" | "decimal" | "numeric" => "double".to_string(),
276 "boolean" | "bool" => "bool".to_string(),
277 "bytes" | "binary" | "varbinary" => "bytes".to_string(),
278 // Temporal types - use int64 for timestamps (epoch millis) or string
279 "timestamp" | "datetime" => "int64".to_string(),
280 "date" | "time" => "string".to_string(),
281 "uuid" => "string".to_string(),
282 _ => {
283 // Default to string for VARCHAR, TEXT, CHAR, etc.
284 "string".to_string()
285 }
286 }
287 }
288}