data_modelling_sdk/import/
sql.rs

1//! SQL Import functionality
2//!
3//! Provides parsing of CREATE TABLE statements from various SQL dialects.
4//!
5//! Uses `sqlparser` to parse CREATE TABLE statements into SDK import primitives.
6//!
7//! # Validation
8//!
9//! All imported table and column names are validated for:
10//! - Valid identifier format
11//! - Maximum length limits
12//! - SQL reserved word detection
13
14use super::{ColumnData, ImportError, ImportResult, TableData};
15use crate::validation::input::{validate_column_name, validate_data_type, validate_table_name};
16use anyhow::Result;
17use sqlparser::ast::{ColumnDef, ColumnOption, ObjectName, Statement, TableConstraint};
18use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect, PostgreSqlDialect, SQLiteDialect};
19use sqlparser::parser::Parser;
20
21/// SQL Importer - parses CREATE TABLE statements
22pub struct SQLImporter {
23    /// SQL dialect to use for parsing
24    pub dialect: String,
25}
26
27impl Default for SQLImporter {
28    fn default() -> Self {
29        Self {
30            dialect: "generic".to_string(),
31        }
32    }
33}
34
35impl SQLImporter {
36    /// Create a new SQL importer with the specified dialect
37    ///
38    /// # Arguments
39    ///
40    /// * `dialect` - SQL dialect name ("postgres", "mysql", "sqlite", "generic")
41    ///
42    /// # Example
43    ///
44    /// ```rust
45    /// use data_modelling_sdk::import::sql::SQLImporter;
46    ///
47    /// let importer = SQLImporter::new("postgres");
48    /// ```
49    pub fn new(dialect: &str) -> Self {
50        Self {
51            dialect: dialect.to_string(),
52        }
53    }
54
55    /// Parse SQL and extract table definitions
56    ///
57    /// # Arguments
58    ///
59    /// * `sql` - SQL string containing CREATE TABLE statements
60    ///
61    /// # Returns
62    ///
63    /// An `ImportResult` containing extracted tables and any parse errors.
64    ///
65    /// # Example
66    ///
67    /// ```rust
68    /// use data_modelling_sdk::import::sql::SQLImporter;
69    ///
70    /// let importer = SQLImporter::new("postgres");
71    /// let sql = "CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(100));";
72    /// let result = importer.parse(sql).unwrap();
73    /// assert_eq!(result.tables.len(), 1);
74    /// ```
75    pub fn parse(&self, sql: &str) -> Result<ImportResult> {
76        let dialect = self.dialect_impl();
77        let statements = match Parser::parse_sql(dialect.as_ref(), sql) {
78            Ok(stmts) => stmts,
79            Err(e) => {
80                return Ok(ImportResult {
81                    tables: Vec::new(),
82                    tables_requiring_name: Vec::new(),
83                    errors: vec![ImportError::ParseError(e.to_string())],
84                    ai_suggestions: None,
85                });
86            }
87        };
88
89        let mut tables = Vec::new();
90        let mut errors = Vec::new();
91
92        for (idx, stmt) in statements.into_iter().enumerate() {
93            if let Statement::CreateTable(create) = stmt {
94                match self.parse_create_table(
95                    idx,
96                    &create.name,
97                    &create.columns,
98                    &create.constraints,
99                ) {
100                    Ok(t) => tables.push(t),
101                    Err(e) => errors.push(ImportError::ParseError(e)),
102                }
103            }
104            // Only CREATE TABLE statements are relevant for data modeling.
105            // Other statements (INSERT, UPDATE, DELETE, etc.) are ignored.
106        }
107
108        Ok(ImportResult {
109            tables,
110            tables_requiring_name: Vec::new(),
111            errors,
112            ai_suggestions: None,
113        })
114    }
115
116    /// Parse SQL with Liquibase format support
117    ///
118    /// Strips Liquibase directive comments (--liquibase formatted sql, --changeset, etc.)
119    /// before parsing the SQL.
120    ///
121    /// # Arguments
122    ///
123    /// * `sql` - SQL string with optional Liquibase comments
124    ///
125    /// # Returns
126    ///
127    /// An `ImportResult` containing extracted tables.
128    ///
129    /// # Example
130    ///
131    /// ```rust
132    /// use data_modelling_sdk::import::sql::SQLImporter;
133    ///
134    /// let importer = SQLImporter::new("postgres");
135    /// let sql = r#"
136    /// --liquibase formatted sql
137    /// --changeset user:1
138    /// CREATE TABLE users (id INT);
139    /// "#;
140    /// let result = importer.parse_liquibase(sql).unwrap();
141    /// ```
142    pub fn parse_liquibase(&self, sql: &str) -> Result<ImportResult> {
143        // Liquibase "formatted SQL" is still SQL, but often includes directive comments like:
144        // --liquibase formatted sql
145        // --changeset user:id
146        // We strip those comment lines, then parse the remaining SQL.
147        let cleaned = sql
148            .lines()
149            .filter(|l| {
150                let t = l.trim_start();
151                if !t.starts_with("--") {
152                    return true;
153                }
154                // Keep regular SQL comments? For now, drop all -- lines to avoid parser issues.
155                false
156            })
157            .collect::<Vec<_>>()
158            .join("\n");
159
160        self.parse(&cleaned)
161    }
162
163    fn dialect_impl(&self) -> Box<dyn Dialect + Send + Sync> {
164        match self.dialect.to_lowercase().as_str() {
165            "postgres" | "postgresql" => Box::new(PostgreSqlDialect {}),
166            "mysql" => Box::new(MySqlDialect {}),
167            "sqlite" => Box::new(SQLiteDialect {}),
168            _ => Box::new(GenericDialect {}),
169        }
170    }
171
172    fn object_name_to_string(name: &ObjectName) -> String {
173        // Use final identifier (supports schema-qualified names).
174        name.0
175            .last()
176            .map(|ident| ident.value.clone())
177            .unwrap_or_else(|| name.to_string())
178    }
179
180    fn parse_create_table(
181        &self,
182        table_index: usize,
183        name: &ObjectName,
184        columns: &[ColumnDef],
185        constraints: &[TableConstraint],
186    ) -> std::result::Result<TableData, String> {
187        let table_name = Self::object_name_to_string(name);
188
189        // Validate table name (warnings are logged but don't fail import)
190        if let Err(e) = validate_table_name(&table_name) {
191            // Log warning but continue - imported SQL may have valid but unusual names
192            tracing::warn!("Table name validation warning: {}", e);
193        }
194
195        // Collect PK columns from table-level constraints.
196        let mut pk_cols = std::collections::HashSet::<String>::new();
197        for c in constraints {
198            if let TableConstraint::PrimaryKey { columns, .. } = c {
199                for col in columns {
200                    pk_cols.insert(col.value.clone());
201                }
202            }
203        }
204
205        let mut out_cols = Vec::new();
206        for col in columns {
207            let mut nullable = true;
208            let mut is_pk = false;
209
210            for opt_def in &col.options {
211                match &opt_def.option {
212                    ColumnOption::NotNull => nullable = false,
213                    ColumnOption::Null => nullable = true,
214                    ColumnOption::Unique { is_primary, .. } => {
215                        if *is_primary {
216                            is_pk = true;
217                        }
218                    }
219                    _ => {}
220                }
221            }
222
223            if pk_cols.contains(&col.name.value) {
224                is_pk = true;
225            }
226
227            let col_name = col.name.value.clone();
228            let data_type = col.data_type.to_string();
229
230            // Validate column name and data type (warnings are logged but don't fail import)
231            if let Err(e) = validate_column_name(&col_name) {
232                tracing::warn!("Column name validation warning for '{}': {}", col_name, e);
233            }
234            if let Err(e) = validate_data_type(&data_type) {
235                tracing::warn!("Data type validation warning for '{}': {}", data_type, e);
236            }
237
238            out_cols.push(ColumnData {
239                name: col_name,
240                data_type,
241                nullable,
242                primary_key: is_pk,
243            });
244        }
245
246        Ok(TableData {
247            table_index,
248            name: Some(table_name),
249            columns: out_cols,
250        })
251    }
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257
258    #[test]
259    fn test_sql_importer_default() {
260        let importer = SQLImporter::default();
261        assert_eq!(importer.dialect, "generic");
262    }
263
264    #[test]
265    fn test_sql_importer_parse_basic() {
266        let importer = SQLImporter::new("postgres");
267        let result = importer
268            .parse("CREATE TABLE test (id INT PRIMARY KEY, name TEXT NOT NULL);")
269            .unwrap();
270        assert!(result.errors.is_empty());
271        assert_eq!(result.tables.len(), 1);
272        let t = &result.tables[0];
273        assert_eq!(t.name.as_deref(), Some("test"));
274        assert_eq!(t.columns.len(), 2);
275        assert!(t.columns.iter().any(|c| c.name == "id" && c.primary_key));
276        assert!(t.columns.iter().any(|c| c.name == "name" && !c.nullable));
277    }
278
279    #[test]
280    fn test_sql_importer_parse_table_pk_constraint() {
281        let importer = SQLImporter::new("postgres");
282        let result = importer
283            .parse("CREATE TABLE t (id INT, name TEXT, CONSTRAINT pk PRIMARY KEY (id));")
284            .unwrap();
285        assert!(result.errors.is_empty());
286        assert_eq!(result.tables.len(), 1);
287        let t = &result.tables[0];
288        assert!(t.columns.iter().any(|c| c.name == "id" && c.primary_key));
289    }
290
291    #[test]
292    fn test_sql_importer_parse_liquibase_formatted_sql() {
293        let importer = SQLImporter::new("postgres");
294        let result = importer
295            .parse_liquibase(
296                "--liquibase formatted sql\n--changeset user:1\nCREATE TABLE test (id INT);\n",
297            )
298            .unwrap();
299        assert!(result.errors.is_empty());
300        assert_eq!(result.tables.len(), 1);
301    }
302}