1use crate::import::{ImportError, ImportResult, TableData};
23use crate::models::{Column, Table, Tag};
24use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
25use anyhow::Result;
26use std::collections::HashMap;
27use tracing::{info, warn};
28
29pub struct ProtobufImporter;
31
32impl Default for ProtobufImporter {
33 fn default() -> Self {
34 Self::new()
35 }
36}
37
38impl ProtobufImporter {
39 pub fn new() -> Self {
49 Self
50 }
51
52 pub fn import(&self, proto_content: &str) -> Result<ImportResult, ImportError> {
78 match self.parse(proto_content) {
79 Ok((tables, errors)) => {
80 let mut sdk_tables = Vec::new();
81 for (idx, table) in tables.iter().enumerate() {
82 sdk_tables.push(TableData {
83 table_index: idx,
84 name: Some(table.name.clone()),
85 columns: table
86 .columns
87 .iter()
88 .map(|c| super::ColumnData {
89 name: c.name.clone(),
90 data_type: c.data_type.clone(),
91 nullable: c.nullable,
92 primary_key: c.primary_key,
93 description: if c.description.is_empty() {
94 None
95 } else {
96 Some(c.description.clone())
97 },
98 quality: if c.quality.is_empty() {
99 None
100 } else {
101 Some(c.quality.clone())
102 },
103 ref_path: c.ref_path.clone(),
104 })
105 .collect(),
106 });
107 }
108 let sdk_errors: Vec<ImportError> = errors
109 .iter()
110 .map(|e| ImportError::ParseError(e.message.clone()))
111 .collect();
112 Ok(ImportResult {
113 tables: sdk_tables,
114 tables_requiring_name: Vec::new(),
115 errors: sdk_errors,
116 ai_suggestions: None,
117 })
118 }
119 Err(e) => Err(ImportError::ParseError(e.to_string())),
120 }
121 }
122
123 fn parse(&self, proto_content: &str) -> Result<(Vec<Table>, Vec<ParserError>)> {
136 let mut errors = Vec::new();
137 let mut tables = Vec::new();
138
139 let lines: Vec<&str> = proto_content.lines().collect();
141 let mut current_message: Option<Message> = None;
142 let mut messages = Vec::new();
143
144 for (_line_num, line) in lines.iter().enumerate() {
145 let trimmed = line.trim();
146
147 if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with("/*") {
149 continue;
150 }
151
152 if trimmed.starts_with("message ") {
154 if let Some(msg) = current_message.take() {
156 messages.push(msg);
157 }
158
159 let msg_name = trimmed
161 .strip_prefix("message ")
162 .and_then(|s| {
163 let s = s.trim_end();
165 if let Some(stripped) = s.strip_suffix("{") {
166 Some(stripped)
167 } else if let Some(stripped) = s.strip_suffix(" {") {
168 Some(stripped)
169 } else {
170 s.split_whitespace().next()
171 }
172 })
173 .map(|s| s.trim())
174 .filter(|s| !s.is_empty())
175 .ok_or_else(|| anyhow::anyhow!("Invalid message syntax: {}", trimmed))?;
176
177 if let Err(e) = validate_table_name(msg_name) {
179 warn!("Message name validation warning for '{}': {}", msg_name, e);
180 }
181
182 current_message = Some(Message {
183 name: msg_name.to_string(),
184 fields: Vec::new(),
185 nested_messages: Vec::new(),
186 });
187 } else if trimmed == "}" || trimmed == "};" {
188 if let Some(msg) = current_message.take() {
190 messages.push(msg);
191 }
192 } else if trimmed.starts_with("enum ") {
193 continue;
195 } else if let Some(ref mut msg) = current_message {
196 if let Ok(field) = self.parse_field(trimmed, _line_num) {
198 msg.fields.push(field);
199 } else {
200 if !trimmed.is_empty() && !trimmed.starts_with("//") {
202 errors.push(ParserError {
203 error_type: "parse_error".to_string(),
204 field: Some(format!("line {}", _line_num + 1)),
205 message: format!("Failed to parse field: {}", trimmed),
206 });
207 }
208 }
209 }
210 }
211
212 if let Some(msg) = current_message {
214 messages.push(msg);
215 }
216
217 for message in &messages {
219 match self.message_to_table(message, &messages, &mut errors) {
220 Ok(table) => tables.push(table),
221 Err(e) => {
222 errors.push(ParserError {
223 error_type: "parse_error".to_string(),
224 field: Some(message.name.clone()),
225 message: format!("Failed to convert message to table: {}", e),
226 });
227 }
228 }
229 }
230
231 Ok((tables, errors))
232 }
233
234 fn parse_field(&self, line: &str, _line_num: usize) -> Result<ProtobufField> {
236 let line = line.split("//").next().unwrap_or(line).trim();
238
239 let parts: Vec<&str> = line.split_whitespace().collect();
241 if parts.len() < 3 {
242 return Err(anyhow::anyhow!("Invalid field syntax"));
243 }
244
245 let mut idx = 0;
246 let mut repeated = false;
247 let mut optional = false;
248
249 while idx < parts.len() {
251 match parts[idx] {
252 "repeated" => {
253 repeated = true;
254 idx += 1;
255 }
256 "optional" => {
257 optional = true;
258 idx += 1;
259 }
260 _ => break,
261 }
262 }
263
264 if idx >= parts.len() {
265 return Err(anyhow::anyhow!("Missing field type"));
266 }
267
268 let field_type = parts[idx].to_string();
269 idx += 1;
270
271 if idx >= parts.len() {
272 return Err(anyhow::anyhow!("Missing field name"));
273 }
274
275 let field_name = parts[idx]
276 .strip_suffix(";")
277 .unwrap_or(parts[idx])
278 .to_string();
279 idx += 1;
280
281 if let Err(e) = validate_column_name(&field_name) {
283 warn!("Field name validation warning for '{}': {}", field_name, e);
284 }
285 if let Err(e) = validate_data_type(&field_type) {
286 warn!("Field type validation warning for '{}': {}", field_type, e);
287 }
288
289 let _field_number = if idx < parts.len() {
291 parts[idx]
292 .strip_prefix("=")
293 .and_then(|s| s.strip_suffix(";"))
294 .and_then(|s| s.parse::<u32>().ok())
295 } else {
296 None
297 };
298
299 Ok(ProtobufField {
300 name: field_name,
301 field_type,
302 repeated,
303 nullable: optional || repeated, })
305 }
306
307 fn message_to_table(
309 &self,
310 message: &Message,
311 all_messages: &[Message],
312 _errors: &mut Vec<ParserError>,
313 ) -> Result<Table> {
314 let mut columns = Vec::new();
315
316 for field in &message.fields {
317 if let Some(nested_msg) = all_messages.iter().find(|m| m.name == field.field_type) {
319 for nested_field in &nested_msg.fields {
322 let nested_field_name = format!("{}.{}", field.name, nested_field.name);
323
324 if let Some(deep_nested_msg) = all_messages
326 .iter()
327 .find(|m| m.name == nested_field.field_type)
328 {
329 for deep_nested_field in &deep_nested_msg.fields {
331 let data_type = if deep_nested_field.repeated {
332 format!(
333 "ARRAY<{}>",
334 self.map_proto_type_to_sql(&deep_nested_field.field_type)
335 )
336 } else {
337 self.map_proto_type_to_sql(&deep_nested_field.field_type)
338 };
339
340 columns.push(Column {
341 name: format!("{}.{}", nested_field_name, deep_nested_field.name),
342 data_type,
343 nullable: nested_field.nullable || deep_nested_field.nullable,
344 primary_key: false,
345 secondary_key: false,
346 composite_key: None,
347 foreign_key: None,
348 constraints: Vec::new(),
349 description: String::new(),
350 quality: Vec::new(),
351 ref_path: None,
352 enum_values: Vec::new(),
353 errors: Vec::new(),
354 column_order: 0,
355 });
356 }
357 } else {
358 let data_type = if nested_field.repeated {
360 format!(
361 "ARRAY<{}>",
362 self.map_proto_type_to_sql(&nested_field.field_type)
363 )
364 } else {
365 self.map_proto_type_to_sql(&nested_field.field_type)
366 };
367
368 columns.push(Column {
369 name: nested_field_name,
370 data_type,
371 nullable: nested_field.nullable,
372 primary_key: false,
373 secondary_key: false,
374 composite_key: None,
375 foreign_key: None,
376 constraints: Vec::new(),
377 description: String::new(),
378 quality: Vec::new(),
379 ref_path: None,
380 enum_values: Vec::new(),
381 errors: Vec::new(),
382 column_order: 0,
383 });
384 }
385 }
386 } else {
387 let data_type = if field.repeated {
389 format!("ARRAY<{}>", self.map_proto_type_to_sql(&field.field_type))
390 } else {
391 self.map_proto_type_to_sql(&field.field_type)
392 };
393
394 columns.push(Column {
395 name: field.name.clone(),
396 data_type,
397 nullable: field.nullable,
398 primary_key: false,
399 secondary_key: false,
400 composite_key: None,
401 foreign_key: None,
402 constraints: Vec::new(),
403 description: String::new(),
404 quality: Vec::new(),
405 ref_path: None,
406 enum_values: Vec::new(),
407 errors: Vec::new(),
408 column_order: 0,
409 });
410 }
411 }
412
413 let tags: Vec<Tag> = Vec::new(); let mut odcl_metadata = HashMap::new();
420 odcl_metadata.insert(
421 "syntax".to_string(),
422 serde_json::Value::String("proto3".to_string()),
423 );
424
425 let table = Table {
426 id: crate::models::table::Table::generate_id(&message.name, None, None, None),
427 name: message.name.clone(),
428 columns,
429 database_type: None,
430 catalog_name: None,
431 schema_name: None,
432 medallion_layers: Vec::new(),
433 scd_pattern: None,
434 data_vault_classification: None,
435 modeling_level: None,
436 tags,
437 odcl_metadata,
438 owner: None,
439 sla: None,
440 contact_details: None,
441 infrastructure_type: None,
442 notes: None,
443 position: None,
444 yaml_file_path: None,
445 drawio_cell_id: None,
446 quality: Vec::new(),
447 errors: Vec::new(),
448 created_at: chrono::Utc::now(),
449 updated_at: chrono::Utc::now(),
450 };
451
452 info!(
453 "Parsed Protobuf message: {} with {} columns",
454 message.name,
455 table.columns.len()
456 );
457 Ok(table)
458 }
459
460 fn map_proto_type_to_sql(&self, proto_type: &str) -> String {
462 match proto_type {
463 "int32" | "int" => "INTEGER".to_string(),
464 "int64" | "long" => "BIGINT".to_string(),
465 "uint32" => "INTEGER".to_string(), "uint64" => "BIGINT".to_string(),
467 "sint32" => "INTEGER".to_string(), "sint64" => "BIGINT".to_string(),
469 "fixed32" => "INTEGER".to_string(), "fixed64" => "BIGINT".to_string(), "sfixed32" => "INTEGER".to_string(), "sfixed64" => "BIGINT".to_string(), "float" => "FLOAT".to_string(),
474 "double" => "DOUBLE".to_string(),
475 "bool" | "boolean" => "BOOLEAN".to_string(),
476 "bytes" => "BYTES".to_string(),
477 "string" => "STRING".to_string(),
478 _ => "STRING".to_string(), }
480 }
481}
482
483#[derive(Debug, Clone)]
485struct Message {
486 name: String,
487 fields: Vec<ProtobufField>,
488 #[allow(dead_code)]
489 nested_messages: Vec<Message>,
490}
491
492#[derive(Debug, Clone)]
494struct ProtobufField {
495 name: String,
496 field_type: String,
497 repeated: bool,
498 nullable: bool,
499}
500
501#[derive(Debug, Clone)]
503pub struct ParserError {
504 pub error_type: String,
505 pub field: Option<String>,
506 pub message: String,
507}