1use super::{ColumnData, ImportError, ImportResult, TableData};
15use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
16use anyhow::Result;
17use once_cell::sync::Lazy;
18use regex::Regex;
19use sqlparser::ast::{ColumnDef, ColumnOption, ObjectName, Statement, TableConstraint};
20use sqlparser::dialect::{
21 AnsiDialect, BigQueryDialect, DatabricksDialect as OfficialDatabricksDialect, Dialect,
22 GenericDialect, HiveDialect, MsSqlDialect, MySqlDialect, PostgreSqlDialect, SQLiteDialect,
23};
24use sqlparser::parser::Parser;
25use std::collections::HashMap;
26
27static RE_IDENTIFIER: Lazy<Regex> =
29 Lazy::new(|| Regex::new(r"(?i)IDENTIFIER\s*\(\s*([^)]+)\s*\)").expect("Invalid regex"));
30static RE_LITERAL: Lazy<Regex> =
31 Lazy::new(|| Regex::new(r#"(?:'([^']*)'|"([^"]*)")"#).expect("Invalid regex"));
32static RE_MATERIALIZED_VIEW: Lazy<Regex> =
33 Lazy::new(|| Regex::new(r"(?i)CREATE\s+MATERIALIZED\s+VIEW").expect("Invalid regex"));
34static RE_TABLE_COMMENT_SINGLE: Lazy<Regex> =
35 Lazy::new(|| Regex::new(r#"(?i)\)\s+COMMENT\s+'[^']*'"#).expect("Invalid regex"));
36static RE_TABLE_COMMENT_DOUBLE: Lazy<Regex> =
37 Lazy::new(|| Regex::new(r#"(?i)\)\s+COMMENT\s+"[^"]*""#).expect("Invalid regex"));
38static RE_TBLPROPERTIES: Lazy<Regex> =
39 Lazy::new(|| Regex::new(r"(?i)TBLPROPERTIES\s*\(").expect("Invalid regex"));
40static RE_CLUSTER_BY: Lazy<Regex> = Lazy::new(|| {
41 Regex::new(r"(?i)\s+CLUSTER\s+BY\s+(?:AUTO|\([^)]*\)|[\w,\s]+)").expect("Invalid regex")
42});
43static RE_VARIABLE_TYPE: Lazy<Regex> =
44 Lazy::new(|| Regex::new(r":\s*:([a-zA-Z_][a-zA-Z0-9_]*)").expect("Invalid regex"));
45static RE_ARRAY_VARIABLE: Lazy<Regex> =
46 Lazy::new(|| Regex::new(r"ARRAY\s*<\s*:([a-zA-Z_][a-zA-Z0-9_]*)\s*>").expect("Invalid regex"));
47static RE_FIELD_VARIABLE: Lazy<Regex> = Lazy::new(|| {
48 Regex::new(r"(\w+)\s+:\w+\s+([A-Z][A-Z0-9_]*(?:<[^>]*>)?)").expect("Invalid regex")
49});
50static RE_COMPLEX_TYPE: Lazy<Regex> =
51 Lazy::new(|| Regex::new(r"(\w+)\s+(STRUCT<|ARRAY<|MAP<)").expect("Invalid regex"));
52
53#[derive(Debug)]
59struct CustomDatabricksDialect {
60 official: OfficialDatabricksDialect,
61}
62
63impl CustomDatabricksDialect {
64 fn new() -> Self {
65 Self {
66 official: OfficialDatabricksDialect {},
67 }
68 }
69}
70
71impl Dialect for CustomDatabricksDialect {
72 fn is_identifier_start(&self, ch: char) -> bool {
73 self.official.is_identifier_start(ch) || ch == ':'
76 }
77
78 fn is_identifier_part(&self, ch: char) -> bool {
79 self.official.is_identifier_part(ch) || ch == ':'
81 }
82
83 fn is_delimited_identifier_start(&self, ch: char) -> bool {
84 self.official.is_delimited_identifier_start(ch)
87 }
88}
89
90#[derive(Debug)]
92struct PreprocessingState {
93 identifier_replacements: HashMap<String, String>,
95}
96
97impl PreprocessingState {
98 fn new() -> Self {
99 Self {
100 identifier_replacements: HashMap::new(),
101 }
102 }
103}
104
105pub struct SQLImporter {
107 pub dialect: String,
109}
110
111impl Default for SQLImporter {
112 fn default() -> Self {
113 Self {
114 dialect: "generic".to_string(),
115 }
116 }
117}
118
119impl SQLImporter {
120 pub fn new(dialect: &str) -> Self {
154 Self {
155 dialect: dialect.to_string(),
156 }
157 }
158
159 fn preprocess_identifier_expressions(sql: &str, state: &mut PreprocessingState) -> String {
164 let mut counter = 0;
165
166 RE_IDENTIFIER
167 .replace_all(sql, |caps: ®ex::Captures| {
168 let expr = caps.get(1).map(|m| m.as_str()).unwrap_or("");
169 counter += 1;
170 let placeholder = format!("__databricks_table_{}__", counter);
171
172 state
174 .identifier_replacements
175 .insert(placeholder.clone(), expr.to_string());
176
177 placeholder
178 })
179 .to_string()
180 }
181
182 fn extract_identifier_table_name(expr: &str) -> Option<String> {
187 let mut parts = Vec::new();
188
189 for cap in RE_LITERAL.captures_iter(expr) {
191 if let Some(m) = cap.get(1) {
192 parts.push(m.as_str().to_string());
193 } else if let Some(m) = cap.get(2) {
194 parts.push(m.as_str().to_string());
195 }
196 }
197
198 if parts.is_empty() {
199 return None;
201 }
202
203 let result = parts.join("");
205 Some(result.trim_matches('.').to_string())
206 }
207
208 fn preprocess_materialized_views(sql: &str) -> String {
213 RE_MATERIALIZED_VIEW
214 .replace_all(sql, "CREATE VIEW")
215 .to_string()
216 }
217
218 fn preprocess_table_comment(sql: &str) -> String {
224 let result = RE_TABLE_COMMENT_SINGLE.replace_all(sql, ")");
226 RE_TABLE_COMMENT_DOUBLE
227 .replace_all(&result, ")")
228 .to_string()
229 }
230
231 fn preprocess_tblproperties(sql: &str) -> String {
236 let mut result = sql.to_string();
240
241 let mut search_start = 0;
243 while let Some(m) = RE_TBLPROPERTIES.find_at(&result, search_start) {
244 let start = m.start();
245 let mut pos = m.end();
246 let mut paren_count = 1;
247
248 let bytes = result.as_bytes();
250 while pos < bytes.len() && paren_count > 0 {
251 if let Some(ch) = result[pos..].chars().next() {
252 if ch == '(' {
253 paren_count += 1;
254 } else if ch == ')' {
255 paren_count -= 1;
256 }
257 pos += ch.len_utf8();
258 } else {
259 break;
260 }
261 }
262
263 if paren_count == 0 {
264 result.replace_range(start..pos, "");
266 search_start = start;
267 } else {
268 search_start = pos;
270 }
271 }
272
273 result
274 }
275
276 fn preprocess_cluster_by(sql: &str) -> String {
280 RE_CLUSTER_BY.replace_all(sql, "").to_string()
281 }
282
283 fn normalize_sql_preserving_quotes(sql: &str) -> String {
289 let mut result = String::with_capacity(sql.len());
290 let mut chars = sql.chars().peekable();
291 let mut in_single_quote = false;
292 let mut in_double_quote = false;
293 let mut last_char_was_space = false;
294
295 while let Some(ch) = chars.next() {
296 match ch {
297 '\\' if in_single_quote || in_double_quote => {
298 if let Some(&next_ch) = chars.peek() {
301 result.push(ch);
302 result.push(next_ch);
303 chars.next(); last_char_was_space = false;
305 } else {
306 result.push(ch);
308 last_char_was_space = false;
309 }
310 }
311 '\'' if !in_double_quote => {
312 if in_single_quote && chars.peek() == Some(&'\'') {
315 result.push(ch);
317 result.push(ch);
318 chars.next(); last_char_was_space = false;
320 } else {
321 in_single_quote = !in_single_quote;
323 result.push(ch);
324 last_char_was_space = false;
325 }
326 }
327 '"' if !in_single_quote => {
328 if in_double_quote && chars.peek() == Some(&'"') {
330 result.push(ch);
332 result.push(ch);
333 chars.next(); last_char_was_space = false;
335 } else {
336 in_double_quote = !in_double_quote;
338 result.push(ch);
339 last_char_was_space = false;
340 }
341 }
342 '\n' | '\r' => {
343 if in_single_quote || in_double_quote {
344 if !last_char_was_space {
347 result.push(' ');
348 last_char_was_space = true;
349 }
350 } else {
351 if !last_char_was_space {
353 result.push(' ');
354 last_char_was_space = true;
355 }
356 }
357 }
358 ' ' | '\t' => {
359 if in_single_quote || in_double_quote {
360 result.push(ch);
362 last_char_was_space = false;
363 } else {
364 if !last_char_was_space {
366 result.push(' ');
367 last_char_was_space = true;
368 }
369 }
370 }
371 '-' if !in_single_quote && !in_double_quote => {
372 if let Some(&'-') = chars.peek() {
374 for c in chars.by_ref() {
376 if c == '\n' || c == '\r' {
377 break;
378 }
379 }
380 if !last_char_was_space {
381 result.push(' ');
382 last_char_was_space = true;
383 }
384 } else {
385 result.push(ch);
386 last_char_was_space = false;
387 }
388 }
389 _ => {
390 result.push(ch);
391 last_char_was_space = false;
392 }
393 }
394 }
395
396 result.trim().to_string()
397 }
398
399 fn convert_backslash_escaped_quotes(sql: &str) -> String {
404 let mut result = String::with_capacity(sql.len());
405 let mut chars = sql.chars().peekable();
406 let mut in_single_quote = false;
407 let mut in_double_quote = false;
408
409 while let Some(ch) = chars.next() {
410 match ch {
411 '\\' if (in_single_quote || in_double_quote) => {
412 if let Some(&next_ch) = chars.peek() {
414 match next_ch {
415 '\'' if in_single_quote => {
416 result.push_str("''");
418 chars.next(); }
420 '"' if in_double_quote => {
421 result.push_str("\"\"");
423 chars.next(); }
425 '\\' => {
426 result.push('\\');
428 result.push('\\');
429 chars.next(); }
431 _ => {
432 result.push(ch);
434 result.push(next_ch);
435 chars.next();
436 }
437 }
438 } else {
439 result.push(ch);
441 }
442 }
443 '\'' if !in_double_quote => {
444 in_single_quote = !in_single_quote;
445 result.push(ch);
446 }
447 '"' if !in_single_quote => {
448 in_double_quote = !in_double_quote;
449 result.push(ch);
450 }
451 _ => {
452 result.push(ch);
453 }
454 }
455 }
456
457 result
458 }
459
460 fn replace_variables_in_struct_types(sql: &str) -> String {
464 RE_VARIABLE_TYPE
465 .replace_all(sql, |_caps: ®ex::Captures| ": STRING".to_string())
466 .to_string()
467 }
468
469 fn replace_variables_in_array_types(sql: &str) -> String {
473 RE_ARRAY_VARIABLE
474 .replace_all(sql, |_caps: ®ex::Captures| "ARRAY<STRING>".to_string())
475 .to_string()
476 }
477
478 fn replace_variables_in_column_definitions(sql: &str) -> String {
483 RE_FIELD_VARIABLE
484 .replace_all(sql, |caps: ®ex::Captures| {
485 let col_name = caps.get(1).map(|m| m.as_str()).unwrap_or("");
486 let type_name = caps.get(2).map(|m| m.as_str()).unwrap_or("");
487 format!("{} {}", col_name, type_name)
488 })
489 .to_string()
490 }
491
492 fn replace_nested_variables(sql: &str) -> String {
497 let mut result = sql.to_string();
498 let mut changed = true;
499 let mut iterations = 0;
500 const MAX_ITERATIONS: usize = 10; while changed && iterations < MAX_ITERATIONS {
504 let before = result.clone();
505
506 result = Self::replace_variables_in_struct_types(&result);
508
509 result = Self::replace_variables_in_array_types(&result);
511
512 changed = before != result;
514 iterations += 1;
515 }
516
517 result
518 }
519
520 fn extract_complex_type_columns(sql: &str) -> (String, Vec<(String, String)>) {
528 let mut column_types = Vec::new();
529 let mut result = sql.to_string();
530
531 let mut matches_to_replace: Vec<(usize, usize, String, String)> = Vec::new();
533
534 for cap in RE_COMPLEX_TYPE.captures_iter(sql) {
535 let col_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
536 let type_start = cap.get(0).map(|m| m.start()).unwrap_or(0);
537 let struct_or_array = cap.get(2).map(|m| m.as_str()).unwrap_or("");
538
539 let bracket_start = type_start + col_name.len() + 1 + struct_or_array.len() - 1; let mut bracket_count = 0;
543 let mut type_end = bracket_start;
544
545 for (idx, ch) in sql[bracket_start..].char_indices() {
546 let pos = bracket_start + idx;
547 if ch == '<' {
548 bracket_count += 1;
549 } else if ch == '>' {
550 bracket_count -= 1;
551 if bracket_count == 0 {
552 type_end = pos + 1;
553 break;
554 }
555 }
556 }
557
558 if bracket_count == 0 && type_end > type_start {
559 let type_start_pos = type_start + col_name.len() + 1;
562 let full_type = sql[type_start_pos..type_end].trim().to_string();
563 matches_to_replace.push((
564 type_start_pos,
565 type_end,
566 col_name.to_string(),
567 full_type,
568 ));
569 }
570 }
571
572 for (start, end, col_name, full_type) in matches_to_replace.iter().rev() {
574 column_types.push((col_name.clone(), full_type.clone()));
575 result.replace_range(*start..*end, "STRING");
576 }
577
578 (result, column_types)
579 }
580
581 pub fn parse(&self, sql: &str) -> Result<ImportResult> {
625 let (preprocessed_sql, preprocessing_state, complex_types) = if self.dialect.to_lowercase()
630 == "databricks"
631 {
632 let mut state = PreprocessingState::new();
633 let mut preprocessed = sql.to_string();
634
635 preprocessed = Self::preprocess_identifier_expressions(&preprocessed, &mut state);
637 preprocessed = Self::replace_variables_in_column_definitions(&preprocessed);
639 preprocessed = Self::replace_nested_variables(&preprocessed);
641 preprocessed = Self::preprocess_materialized_views(&preprocessed);
643 preprocessed = Self::preprocess_table_comment(&preprocessed);
644 preprocessed = Self::preprocess_tblproperties(&preprocessed);
645 preprocessed = Self::preprocess_cluster_by(&preprocessed);
646 let normalized = Self::normalize_sql_preserving_quotes(&preprocessed);
648 let normalized = Self::convert_backslash_escaped_quotes(&normalized);
650
651 let dialect = self.dialect_impl();
653 let parse_result = Parser::parse_sql(dialect.as_ref(), &normalized);
654
655 let (final_sql, complex_cols) = if parse_result.is_err() {
658 let (simplified, cols) = Self::extract_complex_type_columns(&normalized);
660 (simplified, cols)
661 } else {
662 let (_, cols) = Self::extract_complex_type_columns(&normalized);
665 (normalized, cols)
666 };
667
668 (final_sql, state, complex_cols)
669 } else if matches!(self.dialect.to_lowercase().as_str(), "bigquery" | "hive") {
670 let normalized = Self::normalize_sql_preserving_quotes(sql);
672 let normalized = Self::convert_backslash_escaped_quotes(&normalized);
673 (normalized, PreprocessingState::new(), Vec::new())
674 } else {
675 let normalized = Self::normalize_sql_preserving_quotes(sql);
677 let normalized = Self::convert_backslash_escaped_quotes(&normalized);
678 let (simplified_sql, complex_cols) = Self::extract_complex_type_columns(&normalized);
679 (simplified_sql, PreprocessingState::new(), complex_cols)
680 };
681
682 let dialect = self.dialect_impl();
683 let statements = match Parser::parse_sql(dialect.as_ref(), &preprocessed_sql) {
684 Ok(stmts) => stmts,
685 Err(e) => {
686 return Ok(ImportResult {
687 tables: Vec::new(),
688 tables_requiring_name: Vec::new(),
689 errors: vec![ImportError::ParseError(e.to_string())],
690 ai_suggestions: None,
691 });
692 }
693 };
694
695 let mut tables = Vec::new();
696 let mut errors = Vec::new();
697 let mut tables_requiring_name = Vec::new();
698
699 for (idx, stmt) in statements.into_iter().enumerate() {
700 match stmt {
701 Statement::CreateTable(create) => {
702 match self.parse_create_table_with_preprocessing(
703 idx,
704 &create.name,
705 &create.columns,
706 &create.constraints,
707 &preprocessing_state,
708 &complex_types,
709 ) {
710 Ok((table, requires_name)) => {
711 if requires_name {
712 tables_requiring_name.push(super::TableRequiringName {
713 table_index: idx,
714 suggested_name: None,
715 });
716 }
717 tables.push(table);
718 }
719 Err(e) => errors.push(ImportError::ParseError(e)),
720 }
721 }
722 Statement::CreateView(create_view) => {
723 match self.parse_create_view(idx, &create_view.name, &preprocessing_state) {
724 Ok((table, requires_name)) => {
725 if requires_name {
726 tables_requiring_name.push(super::TableRequiringName {
727 table_index: idx,
728 suggested_name: None,
729 });
730 }
731 tables.push(table);
732 }
733 Err(e) => errors.push(ImportError::ParseError(e)),
734 }
735 }
736 _ => {
737 }
739 }
740 }
741
742 Ok(ImportResult {
743 tables,
744 tables_requiring_name,
745 errors,
746 ai_suggestions: None,
747 })
748 }
749
750 pub fn parse_liquibase(&self, sql: &str) -> Result<ImportResult> {
777 let cleaned = sql
782 .lines()
783 .filter(|l| {
784 let t = l.trim_start();
785 if !t.starts_with("--") {
786 return true;
787 }
788 false
790 })
791 .collect::<Vec<_>>()
792 .join("\n");
793
794 self.parse(&cleaned)
795 }
796
797 fn dialect_impl(&self) -> Box<dyn Dialect + Send + Sync> {
798 match self.dialect.to_lowercase().as_str() {
799 "ansi" => Box::new(AnsiDialect {}),
800 "bigquery" => Box::new(BigQueryDialect {}),
801 "databricks" => Box::new(CustomDatabricksDialect::new()),
802 "hive" => Box::new(HiveDialect {}),
803 "mssql" | "sqlserver" => Box::new(MsSqlDialect {}),
804 "mysql" => Box::new(MySqlDialect {}),
805 "postgres" | "postgresql" => Box::new(PostgreSqlDialect {}),
806 "sqlite" => Box::new(SQLiteDialect {}),
807 _ => Box::new(GenericDialect {}),
808 }
809 }
810
811 fn unquote_identifier(identifier: &str) -> String {
823 let trimmed = identifier.trim();
824
825 if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2 {
827 let inner = &trimmed[1..trimmed.len() - 1];
828 return inner.replace("\"\"", "\"");
829 }
830
831 if trimmed.starts_with('`') && trimmed.ends_with('`') && trimmed.len() >= 2 {
833 let inner = &trimmed[1..trimmed.len() - 1];
834 return inner.replace("``", "`");
835 }
836
837 if trimmed.starts_with('[') && trimmed.ends_with(']') && trimmed.len() >= 2 {
839 let inner = &trimmed[1..trimmed.len() - 1];
840 return inner.replace("]]", "]");
841 }
842
843 trimmed.to_string()
845 }
846
847 fn object_name_to_string(name: &ObjectName) -> String {
848 let raw_name = name
852 .0
853 .last()
854 .map(|ident| {
855 ident.to_string()
858 })
859 .unwrap_or_else(|| name.to_string());
860
861 Self::unquote_identifier(&raw_name)
863 }
864
865 fn parse_create_table_with_preprocessing(
866 &self,
867 table_index: usize,
868 name: &ObjectName,
869 columns: &[ColumnDef],
870 constraints: &[TableConstraint],
871 preprocessing_state: &PreprocessingState,
872 complex_types: &[(String, String)],
873 ) -> std::result::Result<(TableData, bool), String> {
874 let mut table_name = Self::object_name_to_string(name);
875 let mut requires_name = false;
876
877 if table_name.starts_with("__databricks_table_")
879 && let Some(original_expr) =
880 preprocessing_state.identifier_replacements.get(&table_name)
881 {
882 if let Some(extracted_name) = Self::extract_identifier_table_name(original_expr) {
884 table_name = extracted_name;
885 } else {
886 requires_name = true;
888 }
889 }
890
891 if let Err(e) = validate_table_name(&table_name) {
893 tracing::warn!("Table name validation warning: {}", e);
895 }
896
897 let mut pk_cols = std::collections::HashSet::<String>::new();
899 for c in constraints {
900 if let TableConstraint::PrimaryKey(pk_constraint) = c {
901 for col in &pk_constraint.columns {
902 pk_cols.insert(Self::unquote_identifier(&col.to_string()));
906 }
907 }
908 }
909
910 let mut out_cols = Vec::new();
911 for col in columns {
912 let mut nullable = true;
913 let mut is_pk = false;
914
915 for opt_def in &col.options {
916 match &opt_def.option {
917 ColumnOption::NotNull => nullable = false,
918 ColumnOption::Null => nullable = true,
919 ColumnOption::Unique(_) => {
920 }
922 ColumnOption::PrimaryKey(_) => {
923 is_pk = true;
925 }
926 _ => {}
927 }
928 }
929
930 let col_name = Self::unquote_identifier(&col.name.value);
931
932 if pk_cols.contains(&col_name) {
933 is_pk = true;
934 }
935 let mut data_type = col.data_type.to_string();
936 let mut description = None;
937
938 for opt_def in &col.options {
940 if let ColumnOption::Comment(comment) = &opt_def.option {
941 description = Some(comment.clone());
942 }
943 }
944
945 if let Some((_, original_type)) =
948 complex_types.iter().find(|(name, _)| name == &col_name)
949 {
950 data_type = original_type.clone();
951 }
952
953 if let Err(e) = validate_column_name(&col_name) {
955 tracing::warn!("Column name validation warning for '{}': {}", col_name, e);
956 }
957 if let Err(e) = validate_data_type(&data_type) {
958 tracing::warn!("Data type validation warning for '{}': {}", data_type, e);
959 }
960
961 out_cols.push(ColumnData {
962 name: col_name,
963 data_type,
964 nullable,
965 primary_key: is_pk,
966 description,
967 ..Default::default()
968 });
969 }
970
971 Ok((
972 TableData {
973 table_index,
974 name: Some(table_name),
975 columns: out_cols,
976 },
977 requires_name,
978 ))
979 }
980
981 fn parse_create_view(
986 &self,
987 view_index: usize,
988 name: &ObjectName,
989 preprocessing_state: &PreprocessingState,
990 ) -> std::result::Result<(TableData, bool), String> {
991 let mut view_name = Self::object_name_to_string(name);
992 let mut requires_name = false;
993
994 if view_name.starts_with("__databricks_table_")
996 && let Some(original_expr) = preprocessing_state.identifier_replacements.get(&view_name)
997 {
998 if let Some(extracted_name) = Self::extract_identifier_table_name(original_expr) {
1000 view_name = extracted_name;
1001 } else {
1002 requires_name = true;
1004 }
1005 }
1006
1007 if let Err(e) = validate_table_name(&view_name) {
1009 tracing::warn!("View name validation warning: {}", e);
1010 }
1011
1012 Ok((
1017 TableData {
1018 table_index: view_index,
1019 name: Some(view_name),
1020 columns: Vec::new(), },
1022 requires_name,
1023 ))
1024 }
1025}
1026
1027#[cfg(test)]
1028mod tests {
1029 use super::*;
1030
1031 #[test]
1032 fn test_sql_importer_default() {
1033 let importer = SQLImporter::default();
1034 assert_eq!(importer.dialect, "generic");
1035 }
1036
1037 #[test]
1038 fn test_sql_importer_parse_basic() {
1039 let importer = SQLImporter::new("postgres");
1040 let result = importer
1041 .parse("CREATE TABLE test (id INT PRIMARY KEY, name TEXT NOT NULL);")
1042 .unwrap();
1043 assert!(result.errors.is_empty());
1044 assert_eq!(result.tables.len(), 1);
1045 let t = &result.tables[0];
1046 assert_eq!(t.name.as_deref(), Some("test"));
1047 assert_eq!(t.columns.len(), 2);
1048 assert!(t.columns.iter().any(|c| c.name == "id" && c.primary_key));
1049 assert!(t.columns.iter().any(|c| c.name == "name" && !c.nullable));
1050 }
1051
1052 #[test]
1053 fn test_sql_importer_parse_table_pk_constraint() {
1054 let importer = SQLImporter::new("postgres");
1055 let result = importer
1056 .parse("CREATE TABLE t (id INT, name TEXT, CONSTRAINT pk PRIMARY KEY (id));")
1057 .unwrap();
1058 assert!(result.errors.is_empty());
1059 assert_eq!(result.tables.len(), 1);
1060 let t = &result.tables[0];
1061 assert!(t.columns.iter().any(|c| c.name == "id" && c.primary_key));
1062 }
1063
1064 #[test]
1065 fn test_sql_importer_parse_liquibase_formatted_sql() {
1066 let importer = SQLImporter::new("postgres");
1067 let result = importer
1068 .parse_liquibase(
1069 "--liquibase formatted sql\n--changeset user:1\nCREATE TABLE test (id INT);\n",
1070 )
1071 .unwrap();
1072 assert!(result.errors.is_empty());
1073 assert_eq!(result.tables.len(), 1);
1074 }
1075
1076 #[test]
1077 fn test_databricks_identifier_with_literal() {
1078 let importer = SQLImporter::new("databricks");
1079 let sql = "CREATE TABLE IDENTIFIER('test_table') (id STRING);";
1080 let result = importer.parse(sql).unwrap();
1081 assert!(result.errors.is_empty());
1082 assert_eq!(result.tables.len(), 1);
1083 assert_eq!(result.tables[0].name.as_deref(), Some("test_table"));
1084 }
1085
1086 #[test]
1087 fn test_databricks_identifier_with_variable() {
1088 let importer = SQLImporter::new("databricks");
1089 let sql = "CREATE TABLE IDENTIFIER(:table_name) (id STRING);";
1090 let result = importer.parse(sql).unwrap();
1091 assert_eq!(result.tables.len(), 1);
1093 assert!(
1094 result.tables[0]
1095 .name
1096 .as_deref()
1097 .unwrap()
1098 .starts_with("__databricks_table_")
1099 );
1100 assert_eq!(result.tables_requiring_name.len(), 1);
1101 }
1102
1103 #[test]
1104 fn test_databricks_identifier_with_concatenation() {
1105 let importer = SQLImporter::new("databricks");
1106 let sql = "CREATE TABLE IDENTIFIER(:catalog || '.schema.table') (id STRING);";
1107 let result = importer.parse(sql).unwrap();
1108 assert!(result.errors.is_empty());
1109 assert_eq!(result.tables.len(), 1);
1110 assert_eq!(result.tables[0].name.as_deref(), Some("schema.table"));
1112 }
1113
1114 #[test]
1115 fn test_databricks_variable_in_struct() {
1116 let importer = SQLImporter::new("databricks");
1117 let sql = "CREATE TABLE example (metadata STRUCT<key: STRING, value: :variable_type, timestamp: TIMESTAMP>);";
1118 let result = importer.parse(sql).unwrap();
1119 if !result.errors.is_empty() {
1120 eprintln!("Parse errors: {:?}", result.errors);
1121 }
1122 assert!(result.errors.is_empty());
1123 assert_eq!(result.tables.len(), 1);
1124 assert!(
1126 result.tables[0].columns[0]
1127 .data_type
1128 .contains("value: STRING")
1129 );
1130 }
1131
1132 #[test]
1133 fn test_databricks_variable_in_array() {
1134 let importer = SQLImporter::new("databricks");
1135 let sql = "CREATE TABLE example (items ARRAY<:element_type>);";
1136 let result = importer.parse(sql).unwrap();
1137 assert!(result.errors.is_empty());
1138 assert_eq!(result.tables.len(), 1);
1139 assert_eq!(result.tables[0].columns[0].data_type, "ARRAY<STRING>");
1141 }
1142
1143 #[test]
1144 fn test_databricks_nested_variables() {
1145 let importer = SQLImporter::new("databricks");
1146 let sql = "CREATE TABLE example (events ARRAY<STRUCT<id: STRING, name: STRING, details: STRUCT<name: STRING, status: :variable_type, timestamp: TIMESTAMP>>>);";
1147 let result = importer.parse(sql).unwrap();
1148 if !result.errors.is_empty() {
1149 eprintln!("Parse errors: {:?}", result.errors);
1150 }
1151 assert!(result.errors.is_empty());
1152 assert_eq!(result.tables.len(), 1);
1153 assert!(
1155 result.tables[0].columns[0]
1156 .data_type
1157 .contains("status: STRING")
1158 );
1159 }
1160
1161 #[test]
1162 fn test_databricks_comment_variable() {
1163 let importer = SQLImporter::new("databricks");
1164 let sql = "CREATE TABLE example (id STRING) COMMENT ':comment_variable';";
1165 let result = importer.parse(sql).unwrap();
1166 assert!(result.errors.is_empty());
1167 assert_eq!(result.tables.len(), 1);
1168 }
1169
1170 #[test]
1171 fn test_databricks_tblproperties_variable() {
1172 let importer = SQLImporter::new("databricks");
1173 let sql = "CREATE TABLE example (id STRING) TBLPROPERTIES ('key1' = ':variable_value', 'key2' = 'static_value');";
1174 let result = importer.parse(sql).unwrap();
1175 assert!(result.errors.is_empty());
1176 assert_eq!(result.tables.len(), 1);
1177 }
1178
1179 #[test]
1180 fn test_databricks_column_variable() {
1181 let importer = SQLImporter::new("databricks");
1182 let sql = "CREATE TABLE example (id :id_var STRING, name :name_var STRING);";
1185 let result = importer.parse(sql).unwrap();
1186 assert!(result.errors.is_empty());
1187 assert_eq!(result.tables.len(), 1);
1188 assert_eq!(result.tables[0].columns.len(), 2);
1189 }
1190
1191 #[test]
1192 fn test_databricks_create_view() {
1193 let importer = SQLImporter::new("databricks");
1194 let sql = "CREATE VIEW example_view AS SELECT id, name FROM source_table;";
1195 let result = importer.parse(sql).unwrap();
1196 assert!(result.errors.is_empty());
1198 assert_eq!(result.tables.len(), 1);
1199 assert_eq!(result.tables[0].name.as_deref(), Some("example_view"));
1200 }
1201
1202 #[test]
1203 fn test_databricks_view_with_identifier() {
1204 let importer = SQLImporter::new("databricks");
1205 let sql =
1206 "CREATE VIEW IDENTIFIER(:catalog || '.schema.view_name') AS SELECT * FROM table1;";
1207 let result = importer.parse(sql).unwrap();
1208 assert!(result.errors.is_empty());
1209 assert_eq!(result.tables.len(), 1);
1210 assert_eq!(result.tables[0].name.as_deref(), Some("schema.view_name"));
1212 }
1213
1214 #[test]
1215 fn test_databricks_create_materialized_view() {
1216 let importer = SQLImporter::new("databricks");
1217 let sql = "CREATE MATERIALIZED VIEW mv_example AS SELECT id, name FROM source_table;";
1219 let result = importer.parse(sql).unwrap();
1220 assert!(result.errors.is_empty());
1221 assert_eq!(result.tables.len(), 1);
1222 assert_eq!(result.tables[0].name.as_deref(), Some("mv_example"));
1223 }
1224}