1use super::{ColumnData, ImportError, ImportResult, TableData};
15use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
16use anyhow::Result;
17use once_cell::sync::Lazy;
18use regex::Regex;
19use sqlparser::ast::{ColumnDef, ColumnOption, ObjectName, Statement, TableConstraint};
20use sqlparser::dialect::{
21 AnsiDialect, BigQueryDialect, DatabricksDialect as OfficialDatabricksDialect, Dialect,
22 GenericDialect, HiveDialect, MsSqlDialect, MySqlDialect, PostgreSqlDialect, SQLiteDialect,
23};
24use sqlparser::parser::Parser;
25use std::collections::HashMap;
26
27static RE_IDENTIFIER: Lazy<Regex> =
29 Lazy::new(|| Regex::new(r"(?i)IDENTIFIER\s*\(\s*([^)]+)\s*\)").expect("Invalid regex"));
30static RE_LITERAL: Lazy<Regex> =
31 Lazy::new(|| Regex::new(r#"(?:'([^']*)'|"([^"]*)")"#).expect("Invalid regex"));
32static RE_MATERIALIZED_VIEW: Lazy<Regex> =
33 Lazy::new(|| Regex::new(r"(?i)CREATE\s+MATERIALIZED\s+VIEW").expect("Invalid regex"));
34static RE_TABLE_COMMENT_SINGLE: Lazy<Regex> =
35 Lazy::new(|| Regex::new(r#"(?i)\)\s+COMMENT\s+'[^']*'"#).expect("Invalid regex"));
36static RE_TABLE_COMMENT_DOUBLE: Lazy<Regex> =
37 Lazy::new(|| Regex::new(r#"(?i)\)\s+COMMENT\s+"[^"]*""#).expect("Invalid regex"));
38static RE_TBLPROPERTIES: Lazy<Regex> =
39 Lazy::new(|| Regex::new(r"(?i)TBLPROPERTIES\s*\(").expect("Invalid regex"));
40static RE_CLUSTER_BY: Lazy<Regex> = Lazy::new(|| {
41 Regex::new(r"(?i)\s+CLUSTER\s+BY\s+(?:AUTO|\([^)]*\)|[\w,\s]+)").expect("Invalid regex")
42});
43static RE_USING_FORMAT: Lazy<Regex> = Lazy::new(|| {
45 Regex::new(r"(?i)\s+USING\s+(?:DELTA|PARQUET|CSV|JSON|ORC|AVRO|TEXT|BINARYFILE|JDBC|ICEBERG)\b")
46 .expect("Invalid regex")
47});
48static RE_VARIABLE_TYPE: Lazy<Regex> =
49 Lazy::new(|| Regex::new(r":\s*:([a-zA-Z_][a-zA-Z0-9_]*)").expect("Invalid regex"));
50static RE_ARRAY_VARIABLE: Lazy<Regex> =
51 Lazy::new(|| Regex::new(r"ARRAY\s*<\s*:([a-zA-Z_][a-zA-Z0-9_]*)\s*>").expect("Invalid regex"));
52static RE_FIELD_VARIABLE: Lazy<Regex> = Lazy::new(|| {
53 Regex::new(r"(\w+)\s+:\w+\s+([A-Z][A-Z0-9_]*(?:<[^>]*>)?)").expect("Invalid regex")
54});
55static RE_COMPLEX_TYPE: Lazy<Regex> =
56 Lazy::new(|| Regex::new(r"(\w+)\s+(STRUCT<|ARRAY<|MAP<)").expect("Invalid regex"));
57
58#[derive(Debug)]
64struct CustomDatabricksDialect {
65 official: OfficialDatabricksDialect,
66}
67
68impl CustomDatabricksDialect {
69 fn new() -> Self {
70 Self {
71 official: OfficialDatabricksDialect {},
72 }
73 }
74}
75
76impl Dialect for CustomDatabricksDialect {
77 fn is_identifier_start(&self, ch: char) -> bool {
78 self.official.is_identifier_start(ch) || ch == ':'
81 }
82
83 fn is_identifier_part(&self, ch: char) -> bool {
84 self.official.is_identifier_part(ch) || ch == ':'
86 }
87
88 fn is_delimited_identifier_start(&self, ch: char) -> bool {
89 self.official.is_delimited_identifier_start(ch)
92 }
93}
94
95#[derive(Debug)]
97struct PreprocessingState {
98 identifier_replacements: HashMap<String, String>,
100}
101
102impl PreprocessingState {
103 fn new() -> Self {
104 Self {
105 identifier_replacements: HashMap::new(),
106 }
107 }
108}
109
110pub struct SQLImporter {
112 pub dialect: String,
114}
115
116impl Default for SQLImporter {
117 fn default() -> Self {
118 Self {
119 dialect: "generic".to_string(),
120 }
121 }
122}
123
124impl SQLImporter {
125 pub fn new(dialect: &str) -> Self {
159 Self {
160 dialect: dialect.to_string(),
161 }
162 }
163
164 fn preprocess_identifier_expressions(sql: &str, state: &mut PreprocessingState) -> String {
169 let mut counter = 0;
170
171 RE_IDENTIFIER
172 .replace_all(sql, |caps: ®ex::Captures| {
173 let expr = caps.get(1).map(|m| m.as_str()).unwrap_or("");
174 counter += 1;
175 let placeholder = format!("__databricks_table_{}__", counter);
176
177 state
179 .identifier_replacements
180 .insert(placeholder.clone(), expr.to_string());
181
182 placeholder
183 })
184 .to_string()
185 }
186
187 fn extract_identifier_table_name(expr: &str) -> Option<String> {
192 let mut parts = Vec::new();
193
194 for cap in RE_LITERAL.captures_iter(expr) {
196 if let Some(m) = cap.get(1) {
197 parts.push(m.as_str().to_string());
198 } else if let Some(m) = cap.get(2) {
199 parts.push(m.as_str().to_string());
200 }
201 }
202
203 if parts.is_empty() {
204 return None;
206 }
207
208 let result = parts.join("");
210 Some(result.trim_matches('.').to_string())
211 }
212
213 fn preprocess_materialized_views(sql: &str) -> String {
218 RE_MATERIALIZED_VIEW
219 .replace_all(sql, "CREATE VIEW")
220 .to_string()
221 }
222
223 fn preprocess_table_comment(sql: &str) -> String {
229 let result = RE_TABLE_COMMENT_SINGLE.replace_all(sql, ")");
231 RE_TABLE_COMMENT_DOUBLE
232 .replace_all(&result, ")")
233 .to_string()
234 }
235
236 fn preprocess_tblproperties(sql: &str) -> String {
241 let mut result = sql.to_string();
245
246 let mut search_start = 0;
248 while let Some(m) = RE_TBLPROPERTIES.find_at(&result, search_start) {
249 let start = m.start();
250 let mut pos = m.end();
251 let mut paren_count = 1;
252
253 let bytes = result.as_bytes();
255 while pos < bytes.len() && paren_count > 0 {
256 if let Some(ch) = result[pos..].chars().next() {
257 if ch == '(' {
258 paren_count += 1;
259 } else if ch == ')' {
260 paren_count -= 1;
261 }
262 pos += ch.len_utf8();
263 } else {
264 break;
265 }
266 }
267
268 if paren_count == 0 {
269 result.replace_range(start..pos, "");
271 search_start = start;
272 } else {
273 search_start = pos;
275 }
276 }
277
278 result
279 }
280
281 fn preprocess_cluster_by(sql: &str) -> String {
285 RE_CLUSTER_BY.replace_all(sql, "").to_string()
286 }
287
288 fn preprocess_using_format(sql: &str) -> String {
293 RE_USING_FORMAT.replace_all(sql, "").to_string()
294 }
295
296 fn normalize_sql_preserving_quotes(sql: &str) -> String {
302 let mut result = String::with_capacity(sql.len());
303 let mut chars = sql.chars().peekable();
304 let mut in_single_quote = false;
305 let mut in_double_quote = false;
306 let mut last_char_was_space = false;
307
308 while let Some(ch) = chars.next() {
309 match ch {
310 '\\' if in_single_quote || in_double_quote => {
311 if let Some(&next_ch) = chars.peek() {
314 result.push(ch);
315 result.push(next_ch);
316 chars.next(); last_char_was_space = false;
318 } else {
319 result.push(ch);
321 last_char_was_space = false;
322 }
323 }
324 '\'' if !in_double_quote => {
325 if in_single_quote && chars.peek() == Some(&'\'') {
328 result.push(ch);
330 result.push(ch);
331 chars.next(); last_char_was_space = false;
333 } else {
334 in_single_quote = !in_single_quote;
336 result.push(ch);
337 last_char_was_space = false;
338 }
339 }
340 '"' if !in_single_quote => {
341 if in_double_quote && chars.peek() == Some(&'"') {
343 result.push(ch);
345 result.push(ch);
346 chars.next(); last_char_was_space = false;
348 } else {
349 in_double_quote = !in_double_quote;
351 result.push(ch);
352 last_char_was_space = false;
353 }
354 }
355 '\n' | '\r' => {
356 if in_single_quote || in_double_quote {
357 if !last_char_was_space {
360 result.push(' ');
361 last_char_was_space = true;
362 }
363 } else {
364 if !last_char_was_space {
366 result.push(' ');
367 last_char_was_space = true;
368 }
369 }
370 }
371 ' ' | '\t' => {
372 if in_single_quote || in_double_quote {
373 result.push(ch);
375 last_char_was_space = false;
376 } else {
377 if !last_char_was_space {
379 result.push(' ');
380 last_char_was_space = true;
381 }
382 }
383 }
384 '-' if !in_single_quote && !in_double_quote => {
385 if let Some(&'-') = chars.peek() {
387 for c in chars.by_ref() {
389 if c == '\n' || c == '\r' {
390 break;
391 }
392 }
393 if !last_char_was_space {
394 result.push(' ');
395 last_char_was_space = true;
396 }
397 } else {
398 result.push(ch);
399 last_char_was_space = false;
400 }
401 }
402 _ => {
403 result.push(ch);
404 last_char_was_space = false;
405 }
406 }
407 }
408
409 result.trim().to_string()
410 }
411
412 fn convert_backslash_escaped_quotes(sql: &str) -> String {
417 let mut result = String::with_capacity(sql.len());
418 let mut chars = sql.chars().peekable();
419 let mut in_single_quote = false;
420 let mut in_double_quote = false;
421
422 while let Some(ch) = chars.next() {
423 match ch {
424 '\\' if (in_single_quote || in_double_quote) => {
425 if let Some(&next_ch) = chars.peek() {
427 match next_ch {
428 '\'' if in_single_quote => {
429 result.push_str("''");
431 chars.next(); }
433 '"' if in_double_quote => {
434 result.push_str("\"\"");
436 chars.next(); }
438 '\\' => {
439 result.push('\\');
441 result.push('\\');
442 chars.next(); }
444 _ => {
445 result.push(ch);
447 result.push(next_ch);
448 chars.next();
449 }
450 }
451 } else {
452 result.push(ch);
454 }
455 }
456 '\'' if !in_double_quote => {
457 in_single_quote = !in_single_quote;
458 result.push(ch);
459 }
460 '"' if !in_single_quote => {
461 in_double_quote = !in_double_quote;
462 result.push(ch);
463 }
464 _ => {
465 result.push(ch);
466 }
467 }
468 }
469
470 result
471 }
472
473 fn replace_variables_in_struct_types(sql: &str) -> String {
477 RE_VARIABLE_TYPE
478 .replace_all(sql, |_caps: ®ex::Captures| ": STRING".to_string())
479 .to_string()
480 }
481
482 fn replace_variables_in_array_types(sql: &str) -> String {
486 RE_ARRAY_VARIABLE
487 .replace_all(sql, |_caps: ®ex::Captures| "ARRAY<STRING>".to_string())
488 .to_string()
489 }
490
491 fn replace_variables_in_column_definitions(sql: &str) -> String {
496 RE_FIELD_VARIABLE
497 .replace_all(sql, |caps: ®ex::Captures| {
498 let col_name = caps.get(1).map(|m| m.as_str()).unwrap_or("");
499 let type_name = caps.get(2).map(|m| m.as_str()).unwrap_or("");
500 format!("{} {}", col_name, type_name)
501 })
502 .to_string()
503 }
504
505 fn replace_nested_variables(sql: &str) -> String {
510 let mut result = sql.to_string();
511 let mut changed = true;
512 let mut iterations = 0;
513 const MAX_ITERATIONS: usize = 10; while changed && iterations < MAX_ITERATIONS {
517 let before = result.clone();
518
519 result = Self::replace_variables_in_struct_types(&result);
521
522 result = Self::replace_variables_in_array_types(&result);
524
525 changed = before != result;
527 iterations += 1;
528 }
529
530 result
531 }
532
533 fn extract_complex_type_columns(sql: &str) -> (String, Vec<(String, String)>) {
541 let mut column_types = Vec::new();
542 let mut result = sql.to_string();
543
544 let mut matches_to_replace: Vec<(usize, usize, String, String)> = Vec::new();
546
547 for cap in RE_COMPLEX_TYPE.captures_iter(sql) {
548 let col_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
549 let type_start = cap.get(0).map(|m| m.start()).unwrap_or(0);
550 let struct_or_array = cap.get(2).map(|m| m.as_str()).unwrap_or("");
551
552 let bracket_start = type_start + col_name.len() + 1 + struct_or_array.len() - 1; let mut bracket_count = 0;
556 let mut type_end = bracket_start;
557
558 for (idx, ch) in sql[bracket_start..].char_indices() {
559 let pos = bracket_start + idx;
560 if ch == '<' {
561 bracket_count += 1;
562 } else if ch == '>' {
563 bracket_count -= 1;
564 if bracket_count == 0 {
565 type_end = pos + 1;
566 break;
567 }
568 }
569 }
570
571 if bracket_count == 0 && type_end > type_start {
572 let type_start_pos = type_start + col_name.len() + 1;
575 let full_type = sql[type_start_pos..type_end].trim().to_string();
576 matches_to_replace.push((
577 type_start_pos,
578 type_end,
579 col_name.to_string(),
580 full_type,
581 ));
582 }
583 }
584
585 for (start, end, col_name, full_type) in matches_to_replace.iter().rev() {
587 column_types.push((col_name.clone(), full_type.clone()));
588 result.replace_range(*start..*end, "STRING");
589 }
590
591 (result, column_types)
592 }
593
594 pub fn parse(&self, sql: &str) -> Result<ImportResult> {
638 let (preprocessed_sql, preprocessing_state, complex_types) = if self.dialect.to_lowercase()
643 == "databricks"
644 {
645 let mut state = PreprocessingState::new();
646 let mut preprocessed = sql.to_string();
647
648 preprocessed = Self::preprocess_identifier_expressions(&preprocessed, &mut state);
650 preprocessed = Self::replace_variables_in_column_definitions(&preprocessed);
652 preprocessed = Self::replace_nested_variables(&preprocessed);
654 preprocessed = Self::preprocess_materialized_views(&preprocessed);
656 preprocessed = Self::preprocess_table_comment(&preprocessed);
657 preprocessed = Self::preprocess_tblproperties(&preprocessed);
658 preprocessed = Self::preprocess_cluster_by(&preprocessed);
659 preprocessed = Self::preprocess_using_format(&preprocessed);
660 let normalized = Self::normalize_sql_preserving_quotes(&preprocessed);
662 let normalized = Self::convert_backslash_escaped_quotes(&normalized);
664
665 let dialect = self.dialect_impl();
667 let parse_result = Parser::parse_sql(dialect.as_ref(), &normalized);
668
669 let (final_sql, complex_cols) = if parse_result.is_err() {
672 let (simplified, cols) = Self::extract_complex_type_columns(&normalized);
674 (simplified, cols)
675 } else {
676 let (_, cols) = Self::extract_complex_type_columns(&normalized);
679 (normalized, cols)
680 };
681
682 (final_sql, state, complex_cols)
683 } else if matches!(self.dialect.to_lowercase().as_str(), "bigquery" | "hive") {
684 let normalized = Self::normalize_sql_preserving_quotes(sql);
686 let normalized = Self::convert_backslash_escaped_quotes(&normalized);
687 (normalized, PreprocessingState::new(), Vec::new())
688 } else {
689 let normalized = Self::normalize_sql_preserving_quotes(sql);
691 let normalized = Self::convert_backslash_escaped_quotes(&normalized);
692 let (simplified_sql, complex_cols) = Self::extract_complex_type_columns(&normalized);
693 (simplified_sql, PreprocessingState::new(), complex_cols)
694 };
695
696 let dialect = self.dialect_impl();
697 let statements = match Parser::parse_sql(dialect.as_ref(), &preprocessed_sql) {
698 Ok(stmts) => stmts,
699 Err(e) => {
700 return Ok(ImportResult {
701 tables: Vec::new(),
702 tables_requiring_name: Vec::new(),
703 errors: vec![ImportError::ParseError(e.to_string())],
704 ai_suggestions: None,
705 });
706 }
707 };
708
709 let mut tables = Vec::new();
710 let mut errors = Vec::new();
711 let mut tables_requiring_name = Vec::new();
712
713 for (idx, stmt) in statements.into_iter().enumerate() {
714 match stmt {
715 Statement::CreateTable(create) => {
716 match self.parse_create_table_with_preprocessing(
717 idx,
718 &create.name,
719 &create.columns,
720 &create.constraints,
721 &preprocessing_state,
722 &complex_types,
723 ) {
724 Ok((table, requires_name)) => {
725 if requires_name {
726 tables_requiring_name.push(super::TableRequiringName {
727 table_index: idx,
728 suggested_name: None,
729 });
730 }
731 tables.push(table);
732 }
733 Err(e) => errors.push(ImportError::ParseError(e)),
734 }
735 }
736 Statement::CreateView(create_view) => {
737 match self.parse_create_view(idx, &create_view.name, &preprocessing_state) {
738 Ok((table, requires_name)) => {
739 if requires_name {
740 tables_requiring_name.push(super::TableRequiringName {
741 table_index: idx,
742 suggested_name: None,
743 });
744 }
745 tables.push(table);
746 }
747 Err(e) => errors.push(ImportError::ParseError(e)),
748 }
749 }
750 _ => {
751 }
753 }
754 }
755
756 Ok(ImportResult {
757 tables,
758 tables_requiring_name,
759 errors,
760 ai_suggestions: None,
761 })
762 }
763
764 pub fn parse_liquibase(&self, sql: &str) -> Result<ImportResult> {
791 let cleaned = sql
796 .lines()
797 .filter(|l| {
798 let t = l.trim_start();
799 if !t.starts_with("--") {
800 return true;
801 }
802 false
804 })
805 .collect::<Vec<_>>()
806 .join("\n");
807
808 self.parse(&cleaned)
809 }
810
811 fn dialect_impl(&self) -> Box<dyn Dialect + Send + Sync> {
812 match self.dialect.to_lowercase().as_str() {
813 "ansi" => Box::new(AnsiDialect {}),
814 "bigquery" => Box::new(BigQueryDialect {}),
815 "databricks" => Box::new(CustomDatabricksDialect::new()),
816 "hive" => Box::new(HiveDialect {}),
817 "mssql" | "sqlserver" => Box::new(MsSqlDialect {}),
818 "mysql" => Box::new(MySqlDialect {}),
819 "postgres" | "postgresql" => Box::new(PostgreSqlDialect {}),
820 "sqlite" => Box::new(SQLiteDialect {}),
821 _ => Box::new(GenericDialect {}),
822 }
823 }
824
825 fn unquote_identifier(identifier: &str) -> String {
837 let trimmed = identifier.trim();
838
839 if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2 {
841 let inner = &trimmed[1..trimmed.len() - 1];
842 return inner.replace("\"\"", "\"");
843 }
844
845 if trimmed.starts_with('`') && trimmed.ends_with('`') && trimmed.len() >= 2 {
847 let inner = &trimmed[1..trimmed.len() - 1];
848 return inner.replace("``", "`");
849 }
850
851 if trimmed.starts_with('[') && trimmed.ends_with(']') && trimmed.len() >= 2 {
853 let inner = &trimmed[1..trimmed.len() - 1];
854 return inner.replace("]]", "]");
855 }
856
857 trimmed.to_string()
859 }
860
861 fn object_name_to_string(name: &ObjectName) -> String {
862 let raw_name = name
866 .0
867 .last()
868 .map(|ident| {
869 ident.to_string()
872 })
873 .unwrap_or_else(|| name.to_string());
874
875 Self::unquote_identifier(&raw_name)
877 }
878
879 fn parse_create_table_with_preprocessing(
880 &self,
881 table_index: usize,
882 name: &ObjectName,
883 columns: &[ColumnDef],
884 constraints: &[TableConstraint],
885 preprocessing_state: &PreprocessingState,
886 complex_types: &[(String, String)],
887 ) -> std::result::Result<(TableData, bool), String> {
888 let mut table_name = Self::object_name_to_string(name);
889 let mut requires_name = false;
890
891 if table_name.starts_with("__databricks_table_")
893 && let Some(original_expr) =
894 preprocessing_state.identifier_replacements.get(&table_name)
895 {
896 if let Some(extracted_name) = Self::extract_identifier_table_name(original_expr) {
898 table_name = extracted_name;
899 } else {
900 requires_name = true;
902 }
903 }
904
905 if let Err(e) = validate_table_name(&table_name) {
907 tracing::warn!("Table name validation warning: {}", e);
909 }
910
911 let mut pk_cols = std::collections::HashSet::<String>::new();
913 for c in constraints {
914 if let TableConstraint::PrimaryKey(pk_constraint) = c {
915 for col in &pk_constraint.columns {
916 pk_cols.insert(Self::unquote_identifier(&col.to_string()));
920 }
921 }
922 }
923
924 let mut out_cols = Vec::new();
925 for col in columns {
926 let mut nullable = true;
927 let mut is_pk = false;
928
929 for opt_def in &col.options {
930 match &opt_def.option {
931 ColumnOption::NotNull => nullable = false,
932 ColumnOption::Null => nullable = true,
933 ColumnOption::Unique(_) => {
934 }
936 ColumnOption::PrimaryKey(_) => {
937 is_pk = true;
939 }
940 _ => {}
941 }
942 }
943
944 let col_name = Self::unquote_identifier(&col.name.value);
945
946 if pk_cols.contains(&col_name) {
947 is_pk = true;
948 }
949 let mut data_type = col.data_type.to_string();
950 let mut description = None;
951
952 for opt_def in &col.options {
954 if let ColumnOption::Comment(comment) = &opt_def.option {
955 description = Some(comment.clone());
956 }
957 }
958
959 if let Some((_, original_type)) =
962 complex_types.iter().find(|(name, _)| name == &col_name)
963 {
964 data_type = original_type.clone();
965 }
966
967 if let Err(e) = validate_column_name(&col_name) {
969 tracing::warn!("Column name validation warning for '{}': {}", col_name, e);
970 }
971 if let Err(e) = validate_data_type(&data_type) {
972 tracing::warn!("Data type validation warning for '{}': {}", data_type, e);
973 }
974
975 out_cols.push(ColumnData {
976 name: col_name,
977 data_type,
978 nullable,
979 primary_key: is_pk,
980 description,
981 ..Default::default()
982 });
983 }
984
985 Ok((
986 TableData {
987 table_index,
988 id: None, name: Some(table_name),
990 columns: out_cols,
991 ..Default::default()
992 },
993 requires_name,
994 ))
995 }
996
997 fn parse_create_view(
1002 &self,
1003 view_index: usize,
1004 name: &ObjectName,
1005 preprocessing_state: &PreprocessingState,
1006 ) -> std::result::Result<(TableData, bool), String> {
1007 let mut view_name = Self::object_name_to_string(name);
1008 let mut requires_name = false;
1009
1010 if view_name.starts_with("__databricks_table_")
1012 && let Some(original_expr) = preprocessing_state.identifier_replacements.get(&view_name)
1013 {
1014 if let Some(extracted_name) = Self::extract_identifier_table_name(original_expr) {
1016 view_name = extracted_name;
1017 } else {
1018 requires_name = true;
1020 }
1021 }
1022
1023 if let Err(e) = validate_table_name(&view_name) {
1025 tracing::warn!("View name validation warning: {}", e);
1026 }
1027
1028 Ok((
1033 TableData {
1034 table_index: view_index,
1035 id: None, name: Some(view_name),
1037 columns: Vec::new(), ..Default::default()
1039 },
1040 requires_name,
1041 ))
1042 }
1043}
1044
1045#[cfg(test)]
1046mod tests {
1047 use super::*;
1048
1049 #[test]
1050 fn test_sql_importer_default() {
1051 let importer = SQLImporter::default();
1052 assert_eq!(importer.dialect, "generic");
1053 }
1054
1055 #[test]
1056 fn test_sql_importer_parse_basic() {
1057 let importer = SQLImporter::new("postgres");
1058 let result = importer
1059 .parse("CREATE TABLE test (id INT PRIMARY KEY, name TEXT NOT NULL);")
1060 .unwrap();
1061 assert!(result.errors.is_empty());
1062 assert_eq!(result.tables.len(), 1);
1063 let t = &result.tables[0];
1064 assert_eq!(t.name.as_deref(), Some("test"));
1065 assert_eq!(t.columns.len(), 2);
1066 assert!(t.columns.iter().any(|c| c.name == "id" && c.primary_key));
1067 assert!(t.columns.iter().any(|c| c.name == "name" && !c.nullable));
1068 }
1069
1070 #[test]
1071 fn test_sql_importer_parse_table_pk_constraint() {
1072 let importer = SQLImporter::new("postgres");
1073 let result = importer
1074 .parse("CREATE TABLE t (id INT, name TEXT, CONSTRAINT pk PRIMARY KEY (id));")
1075 .unwrap();
1076 assert!(result.errors.is_empty());
1077 assert_eq!(result.tables.len(), 1);
1078 let t = &result.tables[0];
1079 assert!(t.columns.iter().any(|c| c.name == "id" && c.primary_key));
1080 }
1081
1082 #[test]
1083 fn test_sql_importer_parse_liquibase_formatted_sql() {
1084 let importer = SQLImporter::new("postgres");
1085 let result = importer
1086 .parse_liquibase(
1087 "--liquibase formatted sql\n--changeset user:1\nCREATE TABLE test (id INT);\n",
1088 )
1089 .unwrap();
1090 assert!(result.errors.is_empty());
1091 assert_eq!(result.tables.len(), 1);
1092 }
1093
1094 #[test]
1095 fn test_databricks_identifier_with_literal() {
1096 let importer = SQLImporter::new("databricks");
1097 let sql = "CREATE TABLE IDENTIFIER('test_table') (id STRING);";
1098 let result = importer.parse(sql).unwrap();
1099 assert!(result.errors.is_empty());
1100 assert_eq!(result.tables.len(), 1);
1101 assert_eq!(result.tables[0].name.as_deref(), Some("test_table"));
1102 }
1103
1104 #[test]
1105 fn test_databricks_identifier_with_variable() {
1106 let importer = SQLImporter::new("databricks");
1107 let sql = "CREATE TABLE IDENTIFIER(:table_name) (id STRING);";
1108 let result = importer.parse(sql).unwrap();
1109 assert_eq!(result.tables.len(), 1);
1111 assert!(
1112 result.tables[0]
1113 .name
1114 .as_deref()
1115 .unwrap()
1116 .starts_with("__databricks_table_")
1117 );
1118 assert_eq!(result.tables_requiring_name.len(), 1);
1119 }
1120
1121 #[test]
1122 fn test_databricks_identifier_with_concatenation() {
1123 let importer = SQLImporter::new("databricks");
1124 let sql = "CREATE TABLE IDENTIFIER(:catalog || '.schema.table') (id STRING);";
1125 let result = importer.parse(sql).unwrap();
1126 assert!(result.errors.is_empty());
1127 assert_eq!(result.tables.len(), 1);
1128 assert_eq!(result.tables[0].name.as_deref(), Some("schema.table"));
1130 }
1131
1132 #[test]
1133 fn test_databricks_variable_in_struct() {
1134 let importer = SQLImporter::new("databricks");
1135 let sql = "CREATE TABLE example (metadata STRUCT<key: STRING, value: :variable_type, timestamp: TIMESTAMP>);";
1136 let result = importer.parse(sql).unwrap();
1137 if !result.errors.is_empty() {
1138 eprintln!("Parse errors: {:?}", result.errors);
1139 }
1140 assert!(result.errors.is_empty());
1141 assert_eq!(result.tables.len(), 1);
1142 assert!(
1144 result.tables[0].columns[0]
1145 .data_type
1146 .contains("value: STRING")
1147 );
1148 }
1149
1150 #[test]
1151 fn test_databricks_variable_in_array() {
1152 let importer = SQLImporter::new("databricks");
1153 let sql = "CREATE TABLE example (items ARRAY<:element_type>);";
1154 let result = importer.parse(sql).unwrap();
1155 assert!(result.errors.is_empty());
1156 assert_eq!(result.tables.len(), 1);
1157 assert_eq!(result.tables[0].columns[0].data_type, "ARRAY<STRING>");
1159 }
1160
1161 #[test]
1162 fn test_databricks_nested_variables() {
1163 let importer = SQLImporter::new("databricks");
1164 let sql = "CREATE TABLE example (events ARRAY<STRUCT<id: STRING, name: STRING, details: STRUCT<name: STRING, status: :variable_type, timestamp: TIMESTAMP>>>);";
1165 let result = importer.parse(sql).unwrap();
1166 if !result.errors.is_empty() {
1167 eprintln!("Parse errors: {:?}", result.errors);
1168 }
1169 assert!(result.errors.is_empty());
1170 assert_eq!(result.tables.len(), 1);
1171 assert!(
1173 result.tables[0].columns[0]
1174 .data_type
1175 .contains("status: STRING")
1176 );
1177 }
1178
1179 #[test]
1180 fn test_databricks_comment_variable() {
1181 let importer = SQLImporter::new("databricks");
1182 let sql = "CREATE TABLE example (id STRING) COMMENT ':comment_variable';";
1183 let result = importer.parse(sql).unwrap();
1184 assert!(result.errors.is_empty());
1185 assert_eq!(result.tables.len(), 1);
1186 }
1187
1188 #[test]
1189 fn test_databricks_tblproperties_variable() {
1190 let importer = SQLImporter::new("databricks");
1191 let sql = "CREATE TABLE example (id STRING) TBLPROPERTIES ('key1' = ':variable_value', 'key2' = 'static_value');";
1192 let result = importer.parse(sql).unwrap();
1193 assert!(result.errors.is_empty());
1194 assert_eq!(result.tables.len(), 1);
1195 }
1196
1197 #[test]
1198 fn test_databricks_column_variable() {
1199 let importer = SQLImporter::new("databricks");
1200 let sql = "CREATE TABLE example (id :id_var STRING, name :name_var STRING);";
1203 let result = importer.parse(sql).unwrap();
1204 assert!(result.errors.is_empty());
1205 assert_eq!(result.tables.len(), 1);
1206 assert_eq!(result.tables[0].columns.len(), 2);
1207 }
1208
1209 #[test]
1210 fn test_databricks_create_view() {
1211 let importer = SQLImporter::new("databricks");
1212 let sql = "CREATE VIEW example_view AS SELECT id, name FROM source_table;";
1213 let result = importer.parse(sql).unwrap();
1214 assert!(result.errors.is_empty());
1216 assert_eq!(result.tables.len(), 1);
1217 assert_eq!(result.tables[0].name.as_deref(), Some("example_view"));
1218 }
1219
1220 #[test]
1221 fn test_databricks_view_with_identifier() {
1222 let importer = SQLImporter::new("databricks");
1223 let sql =
1224 "CREATE VIEW IDENTIFIER(:catalog || '.schema.view_name') AS SELECT * FROM table1;";
1225 let result = importer.parse(sql).unwrap();
1226 assert!(result.errors.is_empty());
1227 assert_eq!(result.tables.len(), 1);
1228 assert_eq!(result.tables[0].name.as_deref(), Some("schema.view_name"));
1230 }
1231
1232 #[test]
1233 fn test_databricks_create_materialized_view() {
1234 let importer = SQLImporter::new("databricks");
1235 let sql = "CREATE MATERIALIZED VIEW mv_example AS SELECT id, name FROM source_table;";
1237 let result = importer.parse(sql).unwrap();
1238 assert!(result.errors.is_empty());
1239 assert_eq!(result.tables.len(), 1);
1240 assert_eq!(result.tables[0].name.as_deref(), Some("mv_example"));
1241 }
1242
1243 #[test]
1244 fn test_databricks_using_delta() {
1245 let importer = SQLImporter::new("databricks");
1246 let sql = "CREATE TABLE delta_table (id INT, name STRING) USING DELTA;";
1248 let result = importer.parse(sql).unwrap();
1249 assert!(result.errors.is_empty(), "Errors: {:?}", result.errors);
1250 assert_eq!(result.tables.len(), 1);
1251 assert_eq!(result.tables[0].name.as_deref(), Some("delta_table"));
1252 assert_eq!(result.tables[0].columns.len(), 2);
1253 }
1254
1255 #[test]
1256 fn test_databricks_using_parquet() {
1257 let importer = SQLImporter::new("databricks");
1258 let sql = "CREATE TABLE parquet_table (id INT, data STRING) USING PARQUET;";
1260 let result = importer.parse(sql).unwrap();
1261 assert!(result.errors.is_empty(), "Errors: {:?}", result.errors);
1262 assert_eq!(result.tables.len(), 1);
1263 assert_eq!(result.tables[0].name.as_deref(), Some("parquet_table"));
1264 }
1265
1266 #[test]
1267 fn test_databricks_using_delta_with_tblproperties() {
1268 let importer = SQLImporter::new("databricks");
1269 let sql = r#"CREATE TABLE complex_table (
1271 id INT,
1272 name STRING
1273 ) USING DELTA
1274 TBLPROPERTIES ('delta.autoOptimize.optimizeWrite' = 'true');"#;
1275 let result = importer.parse(sql).unwrap();
1276 assert!(result.errors.is_empty(), "Errors: {:?}", result.errors);
1277 assert_eq!(result.tables.len(), 1);
1278 assert_eq!(result.tables[0].name.as_deref(), Some("complex_table"));
1279 assert_eq!(result.tables[0].columns.len(), 2);
1280 }
1281
1282 #[test]
1283 fn test_databricks_materialized_view_using_delta() {
1284 let importer = SQLImporter::new("databricks");
1285 let sql = "CREATE MATERIALIZED VIEW mv_delta AS SELECT id FROM source;";
1287 let result = importer.parse(sql).unwrap();
1288 assert!(result.errors.is_empty(), "Errors: {:?}", result.errors);
1289 assert_eq!(result.tables.len(), 1);
1290 assert_eq!(result.tables[0].name.as_deref(), Some("mv_delta"));
1291 }
1292}