1use crate::import::{ImportError, ImportResult, TableData};
23use crate::models::{Column, Table, Tag};
24use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
25use anyhow::Result;
26use std::collections::HashMap;
27use tracing::{info, warn};
28
29pub struct ProtobufImporter;
31
32impl Default for ProtobufImporter {
33 fn default() -> Self {
34 Self::new()
35 }
36}
37
38impl ProtobufImporter {
39 pub fn new() -> Self {
49 Self
50 }
51
52 pub fn import(&self, proto_content: &str) -> Result<ImportResult, ImportError> {
78 match self.parse(proto_content) {
79 Ok((tables, errors)) => {
80 let mut sdk_tables = Vec::new();
81 for (idx, table) in tables.iter().enumerate() {
82 sdk_tables.push(TableData {
83 table_index: idx,
84 name: Some(table.name.clone()),
85 columns: table
86 .columns
87 .iter()
88 .map(|c| super::ColumnData {
89 name: c.name.clone(),
90 data_type: c.data_type.clone(),
91 nullable: c.nullable,
92 primary_key: c.primary_key,
93 description: if c.description.is_empty() {
94 None
95 } else {
96 Some(c.description.clone())
97 },
98 quality: if c.quality.is_empty() {
99 None
100 } else {
101 Some(c.quality.clone())
102 },
103 ref_path: c.ref_path.clone(),
104 enum_values: if c.enum_values.is_empty() {
105 None
106 } else {
107 Some(c.enum_values.clone())
108 },
109 })
110 .collect(),
111 });
112 }
113 let sdk_errors: Vec<ImportError> = errors
114 .iter()
115 .map(|e| ImportError::ParseError(e.message.clone()))
116 .collect();
117 Ok(ImportResult {
118 tables: sdk_tables,
119 tables_requiring_name: Vec::new(),
120 errors: sdk_errors,
121 ai_suggestions: None,
122 })
123 }
124 Err(e) => Err(ImportError::ParseError(e.to_string())),
125 }
126 }
127
128 fn parse(&self, proto_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
141 let mut errors = Vec::new();
142 let mut tables = Vec::new();
143
144 let lines: Vec<&str> = proto_content.lines().collect();
146 let mut current_message: Option<Message> = None;
147 let mut messages = Vec::new();
148
149 for (_line_num, line) in lines.iter().enumerate() {
150 let trimmed = line.trim();
151
152 if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with("/*") {
154 continue;
155 }
156
157 if trimmed.starts_with("message ") {
159 if let Some(msg) = current_message.take() {
161 messages.push(msg);
162 }
163
164 let msg_name = trimmed
166 .strip_prefix("message ")
167 .and_then(|s| {
168 let s = s.trim_end();
170 if let Some(stripped) = s.strip_suffix("{") {
171 Some(stripped)
172 } else if let Some(stripped) = s.strip_suffix(" {") {
173 Some(stripped)
174 } else {
175 s.split_whitespace().next()
176 }
177 })
178 .map(|s| s.trim())
179 .filter(|s| !s.is_empty())
180 .ok_or_else(|| anyhow::anyhow!("Invalid message syntax: {}", trimmed))?;
181
182 if let Err(e) = validate_table_name(msg_name) {
184 warn!("Message name validation warning for '{}': {}", msg_name, e);
185 }
186
187 current_message = Some(Message {
188 name: msg_name.to_string(),
189 fields: Vec::new(),
190 nested_messages: Vec::new(),
191 });
192 } else if trimmed == "}" || trimmed == "};" {
193 if let Some(msg) = current_message.take() {
195 messages.push(msg);
196 }
197 } else if trimmed.starts_with("enum ") {
198 continue;
200 } else if let Some(ref mut msg) = current_message {
201 if let Ok(field) = self.parse_field(trimmed, _line_num) {
203 msg.fields.push(field);
204 } else {
205 if !trimmed.is_empty() && !trimmed.starts_with("//") {
207 errors.push(ParserError {
208 error_type: "parse_error".to_string(),
209 field: Some(format!("line {}", _line_num + 1)),
210 message: format!("Failed to parse field: {}", trimmed),
211 });
212 }
213 }
214 }
215 }
216
217 if let Some(msg) = current_message {
219 messages.push(msg);
220 }
221
222 for message in &messages {
224 match self.message_to_table(message, &messages, &mut errors) {
225 Ok(table) => tables.push(table),
226 Err(e) => {
227 errors.push(ParserError {
228 error_type: "parse_error".to_string(),
229 field: Some(message.name.clone()),
230 message: format!("Failed to convert message to table: {}", e),
231 });
232 }
233 }
234 }
235
236 Ok((tables, errors))
237 }
238
239 fn parse_field(&self, line: &str, _line_num: usize) -> Result<ProtobufField> {
241 let line = line.split("//").next().unwrap_or(line).trim();
243
244 let parts: Vec<&str> = line.split_whitespace().collect();
246 if parts.len() < 3 {
247 return Err(anyhow::anyhow!("Invalid field syntax"));
248 }
249
250 let mut idx = 0;
251 let mut repeated = false;
252 let mut optional = false;
253
254 while idx < parts.len() {
256 match parts[idx] {
257 "repeated" => {
258 repeated = true;
259 idx += 1;
260 }
261 "optional" => {
262 optional = true;
263 idx += 1;
264 }
265 _ => break,
266 }
267 }
268
269 if idx >= parts.len() {
270 return Err(anyhow::anyhow!("Missing field type"));
271 }
272
273 let field_type = parts[idx].to_string();
274 idx += 1;
275
276 if idx >= parts.len() {
277 return Err(anyhow::anyhow!("Missing field name"));
278 }
279
280 let field_name = parts[idx]
281 .strip_suffix(";")
282 .unwrap_or(parts[idx])
283 .to_string();
284 idx += 1;
285
286 if let Err(e) = validate_column_name(&field_name) {
288 warn!("Field name validation warning for '{}': {}", field_name, e);
289 }
290 if let Err(e) = validate_data_type(&field_type) {
291 warn!("Field type validation warning for '{}': {}", field_type, e);
292 }
293
294 let _field_number = if idx < parts.len() {
296 parts[idx]
297 .strip_prefix("=")
298 .and_then(|s| s.strip_suffix(";"))
299 .and_then(|s| s.parse::<u32>().ok())
300 } else {
301 None
302 };
303
304 Ok(ProtobufField {
305 name: field_name,
306 field_type,
307 repeated,
308 nullable: optional || repeated, })
310 }
311
312 fn message_to_table(
314 &self,
315 message: &Message,
316 all_messages: &[Message],
317 _errors: &mut Vec<ParserError>,
318 ) -> Result<Table> {
319 let mut columns = Vec::new();
320
321 for field in &message.fields {
322 if let Some(nested_msg) = all_messages.iter().find(|m| m.name == field.field_type) {
324 for nested_field in &nested_msg.fields {
327 let nested_field_name = format!("{}.{}", field.name, nested_field.name);
328
329 if let Some(deep_nested_msg) = all_messages
331 .iter()
332 .find(|m| m.name == nested_field.field_type)
333 {
334 for deep_nested_field in &deep_nested_msg.fields {
336 let data_type = if deep_nested_field.repeated {
337 format!(
338 "ARRAY<{}>",
339 self.map_proto_type_to_sql(&deep_nested_field.field_type)
340 )
341 } else {
342 self.map_proto_type_to_sql(&deep_nested_field.field_type)
343 };
344
345 columns.push(Column {
346 name: format!("{}.{}", nested_field_name, deep_nested_field.name),
347 data_type,
348 nullable: nested_field.nullable || deep_nested_field.nullable,
349 primary_key: false,
350 secondary_key: false,
351 composite_key: None,
352 foreign_key: None,
353 constraints: Vec::new(),
354 description: String::new(),
355 quality: Vec::new(),
356 ref_path: None,
357 enum_values: Vec::new(),
358 errors: Vec::new(),
359 column_order: 0,
360 });
361 }
362 } else {
363 let data_type = if nested_field.repeated {
365 format!(
366 "ARRAY<{}>",
367 self.map_proto_type_to_sql(&nested_field.field_type)
368 )
369 } else {
370 self.map_proto_type_to_sql(&nested_field.field_type)
371 };
372
373 columns.push(Column {
374 name: nested_field_name,
375 data_type,
376 nullable: nested_field.nullable,
377 primary_key: false,
378 secondary_key: false,
379 composite_key: None,
380 foreign_key: None,
381 constraints: Vec::new(),
382 description: String::new(),
383 quality: Vec::new(),
384 ref_path: None,
385 enum_values: Vec::new(),
386 errors: Vec::new(),
387 column_order: 0,
388 });
389 }
390 }
391 } else {
392 let data_type = if field.repeated {
394 format!("ARRAY<{}>", self.map_proto_type_to_sql(&field.field_type))
395 } else {
396 self.map_proto_type_to_sql(&field.field_type)
397 };
398
399 columns.push(Column {
400 name: field.name.clone(),
401 data_type,
402 nullable: field.nullable,
403 primary_key: false,
404 secondary_key: false,
405 composite_key: None,
406 foreign_key: None,
407 constraints: Vec::new(),
408 description: String::new(),
409 quality: Vec::new(),
410 ref_path: None,
411 enum_values: Vec::new(),
412 errors: Vec::new(),
413 column_order: 0,
414 });
415 }
416 }
417
418 let tags: Vec<Tag> = Vec::new(); let mut odcl_metadata = HashMap::new();
425 odcl_metadata.insert(
426 "syntax".to_string(),
427 serde_json::Value::String("proto3".to_string()),
428 );
429
430 let table = Table {
431 id: crate::models::table::Table::generate_id(&message.name, None, None, None),
432 name: message.name.clone(),
433 columns,
434 database_type: None,
435 catalog_name: None,
436 schema_name: None,
437 medallion_layers: Vec::new(),
438 scd_pattern: None,
439 data_vault_classification: None,
440 modeling_level: None,
441 tags,
442 odcl_metadata,
443 owner: None,
444 sla: None,
445 contact_details: None,
446 infrastructure_type: None,
447 notes: None,
448 position: None,
449 yaml_file_path: None,
450 drawio_cell_id: None,
451 quality: Vec::new(),
452 errors: Vec::new(),
453 created_at: chrono::Utc::now(),
454 updated_at: chrono::Utc::now(),
455 };
456
457 info!(
458 "Parsed Protobuf message: {} with {} columns",
459 message.name,
460 table.columns.len()
461 );
462 Ok(table)
463 }
464
465 fn map_proto_type_to_sql(&self, proto_type: &str) -> String {
467 match proto_type {
468 "int32" | "int" => "INTEGER".to_string(),
469 "int64" | "long" => "BIGINT".to_string(),
470 "uint32" => "INTEGER".to_string(), "uint64" => "BIGINT".to_string(),
472 "sint32" => "INTEGER".to_string(), "sint64" => "BIGINT".to_string(),
474 "fixed32" => "INTEGER".to_string(), "fixed64" => "BIGINT".to_string(), "sfixed32" => "INTEGER".to_string(), "sfixed64" => "BIGINT".to_string(), "float" => "FLOAT".to_string(),
479 "double" => "DOUBLE".to_string(),
480 "bool" | "boolean" => "BOOLEAN".to_string(),
481 "bytes" => "BYTES".to_string(),
482 "string" => "STRING".to_string(),
483 _ => "STRING".to_string(), }
485 }
486}
487
488#[derive(Debug, Clone)]
490struct Message {
491 name: String,
492 fields: Vec<ProtobufField>,
493 #[allow(dead_code)]
494 nested_messages: Vec<Message>,
495}
496
497#[derive(Debug, Clone)]
499struct ProtobufField {
500 name: String,
501 field_type: String,
502 repeated: bool,
503 nullable: bool,
504}
505
506#[derive(Debug, Clone)]
508pub struct ParserError {
509 pub error_type: String,
510 pub field: Option<String>,
511 pub message: String,
512}