1use crate::import::{ImportError, ImportResult, TableData};
23use crate::models::{Column, Table, Tag};
24use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
25use anyhow::Result;
26use std::collections::HashMap;
27use tracing::{info, warn};
28
29pub struct ProtobufImporter;
31
32impl Default for ProtobufImporter {
33 fn default() -> Self {
34 Self::new()
35 }
36}
37
38impl ProtobufImporter {
39 pub fn new() -> Self {
49 Self
50 }
51
52 pub fn import(&self, proto_content: &str) -> Result<ImportResult, ImportError> {
78 match self.parse(proto_content) {
79 Ok((tables, errors)) => {
80 let mut sdk_tables = Vec::new();
81 for (idx, table) in tables.iter().enumerate() {
82 sdk_tables.push(TableData {
83 table_index: idx,
84 name: Some(table.name.clone()),
85 columns: table
86 .columns
87 .iter()
88 .map(|c| super::ColumnData {
89 name: c.name.clone(),
90 data_type: c.data_type.clone(),
91 physical_type: c.physical_type.clone(),
92 nullable: c.nullable,
93 primary_key: c.primary_key,
94 description: if c.description.is_empty() {
95 None
96 } else {
97 Some(c.description.clone())
98 },
99 quality: if c.quality.is_empty() {
100 None
101 } else {
102 Some(c.quality.clone())
103 },
104 relationships: c.relationships.clone(),
105 enum_values: if c.enum_values.is_empty() {
106 None
107 } else {
108 Some(c.enum_values.clone())
109 },
110 })
111 .collect(),
112 });
113 }
114 let sdk_errors: Vec<ImportError> = errors
115 .iter()
116 .map(|e| ImportError::ParseError(e.message.clone()))
117 .collect();
118 Ok(ImportResult {
119 tables: sdk_tables,
120 tables_requiring_name: Vec::new(),
121 errors: sdk_errors,
122 ai_suggestions: None,
123 })
124 }
125 Err(e) => Err(ImportError::ParseError(e.to_string())),
126 }
127 }
128
129 fn parse(&self, proto_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
142 let mut errors = Vec::new();
143 let mut tables = Vec::new();
144
145 let lines: Vec<&str> = proto_content.lines().collect();
147 let mut current_message: Option<Message> = None;
148 let mut messages = Vec::new();
149
150 for (_line_num, line) in lines.iter().enumerate() {
151 let trimmed = line.trim();
152
153 if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with("/*") {
155 continue;
156 }
157
158 if trimmed.starts_with("message ") {
160 if let Some(msg) = current_message.take() {
162 messages.push(msg);
163 }
164
165 let msg_name = trimmed
167 .strip_prefix("message ")
168 .and_then(|s| {
169 let s = s.trim_end();
171 if let Some(stripped) = s.strip_suffix("{") {
172 Some(stripped)
173 } else if let Some(stripped) = s.strip_suffix(" {") {
174 Some(stripped)
175 } else {
176 s.split_whitespace().next()
177 }
178 })
179 .map(|s| s.trim())
180 .filter(|s| !s.is_empty())
181 .ok_or_else(|| anyhow::anyhow!("Invalid message syntax: {}", trimmed))?;
182
183 if let Err(e) = validate_table_name(msg_name) {
185 warn!("Message name validation warning for '{}': {}", msg_name, e);
186 }
187
188 current_message = Some(Message {
189 name: msg_name.to_string(),
190 fields: Vec::new(),
191 });
192 } else if trimmed == "}" || trimmed == "};" {
193 if let Some(msg) = current_message.take() {
195 messages.push(msg);
196 }
197 } else if trimmed.starts_with("enum ") {
198 continue;
200 } else if let Some(ref mut msg) = current_message {
201 if let Ok(field) = self.parse_field(trimmed, _line_num) {
203 msg.fields.push(field);
204 } else {
205 if !trimmed.is_empty() && !trimmed.starts_with("//") {
207 errors.push(ParserError {
208 error_type: "parse_error".to_string(),
209 field: Some(format!("line {}", _line_num + 1)),
210 message: format!("Failed to parse field: {}", trimmed),
211 });
212 }
213 }
214 }
215 }
216
217 if let Some(msg) = current_message {
219 messages.push(msg);
220 }
221
222 for message in &messages {
224 match self.message_to_table(message, &messages, &mut errors) {
225 Ok(table) => tables.push(table),
226 Err(e) => {
227 errors.push(ParserError {
228 error_type: "parse_error".to_string(),
229 field: Some(message.name.clone()),
230 message: format!("Failed to convert message to table: {}", e),
231 });
232 }
233 }
234 }
235
236 Ok((tables, errors))
237 }
238
239 fn parse_field(&self, line: &str, _line_num: usize) -> Result<ProtobufField> {
241 let line = line.split("//").next().unwrap_or(line).trim();
243
244 let parts: Vec<&str> = line.split_whitespace().collect();
246 if parts.len() < 3 {
247 return Err(anyhow::anyhow!("Invalid field syntax"));
248 }
249
250 let mut idx = 0;
251 let mut repeated = false;
252 let mut optional = false;
253
254 while idx < parts.len() {
256 match parts[idx] {
257 "repeated" => {
258 repeated = true;
259 idx += 1;
260 }
261 "optional" => {
262 optional = true;
263 idx += 1;
264 }
265 _ => break,
266 }
267 }
268
269 if idx >= parts.len() {
270 return Err(anyhow::anyhow!("Missing field type"));
271 }
272
273 let field_type = parts[idx].to_string();
274 idx += 1;
275
276 if idx >= parts.len() {
277 return Err(anyhow::anyhow!("Missing field name"));
278 }
279
280 let field_name = parts[idx]
281 .strip_suffix(";")
282 .unwrap_or(parts[idx])
283 .to_string();
284 idx += 1;
285
286 if let Err(e) = validate_column_name(&field_name) {
288 warn!("Field name validation warning for '{}': {}", field_name, e);
289 }
290 if let Err(e) = validate_data_type(&field_type) {
291 warn!("Field type validation warning for '{}': {}", field_type, e);
292 }
293
294 let _field_number = if idx < parts.len() {
296 parts[idx]
297 .strip_prefix("=")
298 .and_then(|s| s.strip_suffix(";"))
299 .and_then(|s| s.parse::<u32>().ok())
300 } else {
301 None
302 };
303
304 Ok(ProtobufField {
305 name: field_name,
306 field_type,
307 repeated,
308 nullable: optional || repeated, })
310 }
311
312 fn message_to_table(
314 &self,
315 message: &Message,
316 all_messages: &[Message],
317 _errors: &mut Vec<ParserError>,
318 ) -> Result<Table> {
319 let mut columns = Vec::new();
320
321 for field in &message.fields {
322 if let Some(nested_msg) = all_messages.iter().find(|m| m.name == field.field_type) {
324 for nested_field in &nested_msg.fields {
327 let nested_field_name = format!("{}.{}", field.name, nested_field.name);
328
329 if let Some(deep_nested_msg) = all_messages
331 .iter()
332 .find(|m| m.name == nested_field.field_type)
333 {
334 for deep_nested_field in &deep_nested_msg.fields {
336 let data_type = if deep_nested_field.repeated {
337 format!(
338 "ARRAY<{}>",
339 self.map_proto_type_to_sql(&deep_nested_field.field_type)
340 )
341 } else {
342 self.map_proto_type_to_sql(&deep_nested_field.field_type)
343 };
344
345 columns.push(Column {
346 name: format!("{}.{}", nested_field_name, deep_nested_field.name),
347 data_type,
348 physical_type: None,
349 nullable: nested_field.nullable || deep_nested_field.nullable,
350 primary_key: false,
351 secondary_key: false,
352 composite_key: None,
353 foreign_key: None,
354 constraints: Vec::new(),
355 description: String::new(),
356 quality: Vec::new(),
357 relationships: Vec::new(),
358 enum_values: Vec::new(),
359 errors: Vec::new(),
360 column_order: 0,
361 nested_data: None,
362 });
363 }
364 } else {
365 let data_type = if nested_field.repeated {
367 format!(
368 "ARRAY<{}>",
369 self.map_proto_type_to_sql(&nested_field.field_type)
370 )
371 } else {
372 self.map_proto_type_to_sql(&nested_field.field_type)
373 };
374
375 columns.push(Column {
376 name: nested_field_name,
377 data_type,
378 physical_type: None,
379 nullable: nested_field.nullable,
380 primary_key: false,
381 secondary_key: false,
382 composite_key: None,
383 foreign_key: None,
384 constraints: Vec::new(),
385 description: String::new(),
386 quality: Vec::new(),
387 relationships: Vec::new(),
388 enum_values: Vec::new(),
389 errors: Vec::new(),
390 column_order: 0,
391 nested_data: None,
392 });
393 }
394 }
395 } else {
396 let data_type = if field.repeated {
398 format!("ARRAY<{}>", self.map_proto_type_to_sql(&field.field_type))
399 } else {
400 self.map_proto_type_to_sql(&field.field_type)
401 };
402
403 columns.push(Column {
404 name: field.name.clone(),
405 data_type,
406 physical_type: None,
407 nullable: field.nullable,
408 primary_key: false,
409 secondary_key: false,
410 composite_key: None,
411 foreign_key: None,
412 constraints: Vec::new(),
413 description: String::new(),
414 quality: Vec::new(),
415 relationships: Vec::new(),
416 enum_values: Vec::new(),
417 errors: Vec::new(),
418 column_order: 0,
419 nested_data: None,
420 });
421 }
422 }
423
424 let tags: Vec<Tag> = Vec::new(); let mut odcl_metadata = HashMap::new();
431 odcl_metadata.insert(
432 "syntax".to_string(),
433 serde_json::Value::String("proto3".to_string()),
434 );
435
436 let table = Table {
437 id: crate::models::table::Table::generate_id(&message.name, None, None, None),
438 name: message.name.clone(),
439 columns,
440 database_type: None,
441 catalog_name: None,
442 schema_name: None,
443 medallion_layers: Vec::new(),
444 scd_pattern: None,
445 data_vault_classification: None,
446 modeling_level: None,
447 tags,
448 odcl_metadata,
449 owner: None,
450 sla: None,
451 contact_details: None,
452 infrastructure_type: None,
453 notes: None,
454 position: None,
455 yaml_file_path: None,
456 drawio_cell_id: None,
457 quality: Vec::new(),
458 errors: Vec::new(),
459 created_at: chrono::Utc::now(),
460 updated_at: chrono::Utc::now(),
461 };
462
463 info!(
464 "Parsed Protobuf message: {} with {} columns",
465 message.name,
466 table.columns.len()
467 );
468 Ok(table)
469 }
470
471 fn map_proto_type_to_sql(&self, proto_type: &str) -> String {
473 match proto_type {
474 "int32" | "int" => "INTEGER".to_string(),
475 "int64" | "long" => "BIGINT".to_string(),
476 "uint32" => "INTEGER".to_string(), "uint64" => "BIGINT".to_string(),
478 "sint32" => "INTEGER".to_string(), "sint64" => "BIGINT".to_string(),
480 "fixed32" => "INTEGER".to_string(), "fixed64" => "BIGINT".to_string(), "sfixed32" => "INTEGER".to_string(), "sfixed64" => "BIGINT".to_string(), "float" => "FLOAT".to_string(),
485 "double" => "DOUBLE".to_string(),
486 "bool" | "boolean" => "BOOLEAN".to_string(),
487 "bytes" => "BYTES".to_string(),
488 "string" => "STRING".to_string(),
489 _ => "STRING".to_string(), }
491 }
492}
493
494#[derive(Debug, Clone)]
496struct Message {
497 name: String,
498 fields: Vec<ProtobufField>,
499}
500
501#[derive(Debug, Clone)]
503struct ProtobufField {
504 name: String,
505 field_type: String,
506 repeated: bool,
507 nullable: bool,
508}
509
510#[derive(Debug, Clone)]
512pub struct ParserError {
513 pub error_type: String,
514 pub field: Option<String>,
515 pub message: String,
516}