sql_splitter/schema/
ddl.rs

1//! MySQL DDL parsing for schema extraction.
2//!
3//! Parses CREATE TABLE and ALTER TABLE statements to extract:
4//! - Column definitions with types
5//! - Primary key constraints
6//! - Foreign key constraints
7
8use super::{Column, ColumnId, ColumnType, ForeignKey, IndexDef, Schema, TableId, TableSchema};
9use once_cell::sync::Lazy;
10use regex::Regex;
11
12/// Regex to extract table name from CREATE TABLE
13/// Supports: `table` (MySQL), "table" (PostgreSQL), table (SQLite/unquoted), schema.table
14static CREATE_TABLE_NAME_RE: Lazy<Regex> = Lazy::new(|| {
15    Regex::new(r#"(?i)CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(?:\w+\.)?[`"]?([^`"\s(]+)[`"]?"#)
16        .unwrap()
17});
18
19/// Regex to extract table name from ALTER TABLE
20/// Supports: `table` (MySQL), "table" (PostgreSQL), table (SQLite/unquoted), schema.table
21static ALTER_TABLE_NAME_RE: Lazy<Regex> = Lazy::new(|| {
22    Regex::new(r#"(?i)ALTER\s+TABLE\s+(?:ONLY\s+)?(?:\w+\.)?[`"]?([^`"\s]+)[`"]?"#).unwrap()
23});
24
25/// Regex for column definition
26/// Supports: `column` (MySQL), "column" (PostgreSQL), column (unquoted)
27static COLUMN_DEF_RE: Lazy<Regex> = Lazy::new(|| {
28    Regex::new(r#"^\s*[`"]?([^`"\s,]+)[`"]?\s+(\w+(?:\([^)]+\))?(?:\s+unsigned)?)"#).unwrap()
29});
30
31/// Regex for PRIMARY KEY constraint
32static PRIMARY_KEY_RE: Lazy<Regex> =
33    Lazy::new(|| Regex::new(r"(?i)PRIMARY\s+KEY\s*\(([^)]+)\)").unwrap());
34
35/// Regex for inline PRIMARY KEY on column
36static INLINE_PRIMARY_KEY_RE: Lazy<Regex> =
37    Lazy::new(|| Regex::new(r"(?i)\bPRIMARY\s+KEY\b").unwrap());
38
39/// Regex for FOREIGN KEY constraint with optional constraint name
40/// Supports: `name` (MySQL), "name" (PostgreSQL), name (unquoted)
41static FOREIGN_KEY_RE: Lazy<Regex> = Lazy::new(|| {
42    Regex::new(
43        r#"(?i)(?:CONSTRAINT\s+[`"]?([^`"\s]+)[`"]?\s+)?FOREIGN\s+KEY\s*\(([^)]+)\)\s*REFERENCES\s+(?:\w+\.)?[`"]?([^`"\s(]+)[`"]?\s*\(([^)]+)\)"#,
44    )
45    .unwrap()
46});
47
48/// Regex to detect NOT NULL constraint
49static NOT_NULL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bNOT\s+NULL\b").unwrap());
50
51/// Regex for inline INDEX/KEY in CREATE TABLE
52/// Matches: INDEX idx_name (col1, col2), KEY idx_name (col1), UNIQUE INDEX idx_name (col1)
53static INLINE_INDEX_RE: Lazy<Regex> = Lazy::new(|| {
54    Regex::new(r#"(?i)(?:(UNIQUE)\s+)?(?:INDEX|KEY)\s+[`"]?(\w+)[`"]?\s*\(([^)]+)\)"#).unwrap()
55});
56
57/// Regex for CREATE INDEX statement
58/// Matches: CREATE [UNIQUE] INDEX [IF NOT EXISTS] idx_name ON table [USING method] (columns)
59static CREATE_INDEX_RE: Lazy<Regex> = Lazy::new(|| {
60    Regex::new(
61        r#"(?i)CREATE\s+(UNIQUE\s+)?INDEX\s+(?:IF\s+NOT\s+EXISTS\s+)?[`"]?(\w+)[`"]?\s+ON\s+(?:\w+\.)?[`"]?(\w+)[`"]?\s*(?:USING\s+(\w+)\s*)?\(([^)]+)\)"#,
62    )
63    .unwrap()
64});
65
66/// Builder for constructing schema from DDL statements
67#[derive(Debug, Default)]
68pub struct SchemaBuilder {
69    schema: Schema,
70}
71
72impl SchemaBuilder {
73    /// Create a new schema builder
74    pub fn new() -> Self {
75        Self {
76            schema: Schema::new(),
77        }
78    }
79
80    /// Parse a CREATE TABLE statement and add to schema
81    pub fn parse_create_table(&mut self, stmt: &str) -> Option<TableId> {
82        let table_name = extract_create_table_name(stmt)?;
83
84        // Check if table already exists
85        if self.schema.get_table_id(&table_name).is_some() {
86            return self.schema.get_table_id(&table_name);
87        }
88
89        let mut table = TableSchema::new(table_name, TableId(0));
90        table.create_statement = Some(stmt.to_string());
91
92        // Extract the body between first ( and last )
93        let body = extract_table_body(stmt)?;
94
95        // Parse columns and constraints
96        parse_table_body(&body, &mut table);
97
98        // Add table to schema
99        Some(self.schema.add_table(table))
100    }
101
102    /// Parse an ALTER TABLE statement and update existing table
103    pub fn parse_alter_table(&mut self, stmt: &str) -> Option<TableId> {
104        let table_name = extract_alter_table_name(stmt)?;
105        let table_id = self.schema.get_table_id(&table_name)?;
106
107        // Parse any FK constraints added by ALTER TABLE
108        for fk in parse_foreign_keys(stmt) {
109            if let Some(table) = self.schema.table_mut(table_id) {
110                // Resolve column names to IDs
111                let mut resolved_fk = fk;
112                resolved_fk.columns = resolved_fk
113                    .column_names
114                    .iter()
115                    .filter_map(|name| table.get_column_id(name))
116                    .collect();
117                table.foreign_keys.push(resolved_fk);
118            }
119        }
120
121        Some(table_id)
122    }
123
124    /// Parse a CREATE INDEX statement and add to the appropriate table
125    pub fn parse_create_index(&mut self, stmt: &str) -> Option<TableId> {
126        let caps = CREATE_INDEX_RE.captures(stmt)?;
127
128        let is_unique = caps.get(1).is_some();
129        let index_name = caps.get(2)?.as_str().to_string();
130        let table_name = caps.get(3)?.as_str().to_string();
131        let index_type = caps.get(4).map(|m| m.as_str().to_uppercase());
132        let columns_str = caps.get(5)?.as_str();
133        let columns = parse_column_list(columns_str);
134
135        let table_id = self.schema.get_table_id(&table_name)?;
136
137        if let Some(table) = self.schema.table_mut(table_id) {
138            table.indexes.push(IndexDef {
139                name: index_name,
140                columns,
141                is_unique,
142                index_type,
143            });
144        }
145
146        Some(table_id)
147    }
148
149    /// Finalize the schema, resolving all FK references
150    pub fn build(mut self) -> Schema {
151        self.schema.resolve_foreign_keys();
152        self.schema
153    }
154
155    /// Get current schema (for inspection during building)
156    pub fn schema(&self) -> &Schema {
157        &self.schema
158    }
159}
160
161/// Extract table name from CREATE TABLE statement
162pub fn extract_create_table_name(stmt: &str) -> Option<String> {
163    CREATE_TABLE_NAME_RE
164        .captures(stmt)
165        .and_then(|c| c.get(1))
166        .map(|m| m.as_str().to_string())
167}
168
169/// Extract table name from ALTER TABLE statement
170pub fn extract_alter_table_name(stmt: &str) -> Option<String> {
171    ALTER_TABLE_NAME_RE
172        .captures(stmt)
173        .and_then(|c| c.get(1))
174        .map(|m| m.as_str().to_string())
175}
176
177/// Extract the body of a CREATE TABLE statement (between first ( and matching ))
178fn extract_table_body(stmt: &str) -> Option<String> {
179    let bytes = stmt.as_bytes();
180    let mut depth = 0;
181    let mut start = None;
182    let mut in_string = false;
183    let mut escape_next = false;
184
185    for (i, &b) in bytes.iter().enumerate() {
186        if escape_next {
187            escape_next = false;
188            continue;
189        }
190
191        if b == b'\\' && in_string {
192            escape_next = true;
193            continue;
194        }
195
196        if b == b'\'' {
197            in_string = !in_string;
198            continue;
199        }
200
201        if in_string {
202            continue;
203        }
204
205        if b == b'(' {
206            if depth == 0 {
207                start = Some(i + 1);
208            }
209            depth += 1;
210        } else if b == b')' {
211            depth -= 1;
212            if depth == 0 {
213                if let Some(s) = start {
214                    return Some(stmt[s..i].to_string());
215                }
216            }
217        }
218    }
219
220    None
221}
222
223/// Parse the body of a CREATE TABLE to extract columns and constraints
224fn parse_table_body(body: &str, table: &mut TableSchema) {
225    // Split by commas, but respect nested parentheses
226    let parts = split_table_body(body);
227
228    for part in parts {
229        let trimmed = part.trim();
230        if trimmed.is_empty() {
231            continue;
232        }
233
234        // Check if this is a constraint or a column
235        let upper = trimmed.to_uppercase();
236        if upper.starts_with("PRIMARY KEY")
237            || upper.starts_with("CONSTRAINT")
238            || upper.starts_with("FOREIGN KEY")
239            || upper.starts_with("KEY ")
240            || upper.starts_with("INDEX ")
241            || upper.starts_with("UNIQUE ")
242            || upper.starts_with("FULLTEXT ")
243            || upper.starts_with("SPATIAL ")
244            || upper.starts_with("CHECK ")
245        {
246            // Parse constraints
247            if let Some(pk_cols) = parse_primary_key_constraint(trimmed) {
248                for col_name in pk_cols {
249                    if let Some(col) = table
250                        .columns
251                        .iter_mut()
252                        .find(|c| c.name.eq_ignore_ascii_case(&col_name))
253                    {
254                        col.is_primary_key = true;
255                        if !table.primary_key.contains(&col.ordinal) {
256                            table.primary_key.push(col.ordinal);
257                        }
258                    }
259                }
260            }
261
262            for fk in parse_foreign_keys(trimmed) {
263                let mut resolved_fk = fk;
264                resolved_fk.columns = resolved_fk
265                    .column_names
266                    .iter()
267                    .filter_map(|name| table.get_column_id(name))
268                    .collect();
269                table.foreign_keys.push(resolved_fk);
270            }
271
272            // Parse inline indexes (INDEX, KEY, UNIQUE INDEX, UNIQUE KEY)
273            if let Some(idx) = parse_inline_index(trimmed) {
274                table.indexes.push(idx);
275            }
276        } else {
277            // Parse column definition
278            if let Some(col) = parse_column_def(trimmed, ColumnId(table.columns.len() as u16)) {
279                // Check for inline PRIMARY KEY
280                if INLINE_PRIMARY_KEY_RE.is_match(trimmed) {
281                    let mut col = col;
282                    col.is_primary_key = true;
283                    table.primary_key.push(col.ordinal);
284                    table.columns.push(col);
285                } else {
286                    table.columns.push(col);
287                }
288            }
289        }
290    }
291}
292
293/// Split table body by commas, respecting nested parentheses
294pub fn split_table_body(body: &str) -> Vec<String> {
295    let mut parts = Vec::new();
296    let mut current = String::new();
297    let mut depth = 0;
298    let mut in_string = false;
299    let mut escape_next = false;
300
301    for ch in body.chars() {
302        if escape_next {
303            current.push(ch);
304            escape_next = false;
305            continue;
306        }
307
308        if ch == '\\' && in_string {
309            current.push(ch);
310            escape_next = true;
311            continue;
312        }
313
314        if ch == '\'' {
315            in_string = !in_string;
316            current.push(ch);
317            continue;
318        }
319
320        if in_string {
321            current.push(ch);
322            continue;
323        }
324
325        match ch {
326            '(' => {
327                depth += 1;
328                current.push(ch);
329            }
330            ')' => {
331                depth -= 1;
332                current.push(ch);
333            }
334            ',' if depth == 0 => {
335                parts.push(current.trim().to_string());
336                current = String::new();
337            }
338            _ => {
339                current.push(ch);
340            }
341        }
342    }
343
344    if !current.trim().is_empty() {
345        parts.push(current.trim().to_string());
346    }
347
348    parts
349}
350
351/// Parse a column definition
352fn parse_column_def(def: &str, ordinal: ColumnId) -> Option<Column> {
353    let caps = COLUMN_DEF_RE.captures(def)?;
354    let name = caps.get(1)?.as_str().to_string();
355    let type_str = caps.get(2)?.as_str();
356
357    let col_type = ColumnType::from_mysql_type(type_str);
358    let is_nullable = !NOT_NULL_RE.is_match(def);
359
360    Some(Column {
361        name,
362        col_type,
363        ordinal,
364        is_primary_key: false,
365        is_nullable,
366    })
367}
368
369/// Parse PRIMARY KEY constraint, returns column names
370fn parse_primary_key_constraint(constraint: &str) -> Option<Vec<String>> {
371    let caps = PRIMARY_KEY_RE.captures(constraint)?;
372    let cols_str = caps.get(1)?.as_str();
373    Some(parse_column_list(cols_str))
374}
375
376/// Parse inline INDEX/KEY constraint from CREATE TABLE body
377fn parse_inline_index(constraint: &str) -> Option<IndexDef> {
378    let caps = INLINE_INDEX_RE.captures(constraint)?;
379
380    let is_unique = caps.get(1).is_some();
381    let index_name = caps.get(2)?.as_str().to_string();
382    let columns_str = caps.get(3)?.as_str();
383    let columns = parse_column_list(columns_str);
384
385    Some(IndexDef {
386        name: index_name,
387        columns,
388        is_unique,
389        index_type: None, // Inline indexes don't specify type
390    })
391}
392
393/// Parse FOREIGN KEY constraints from a statement
394fn parse_foreign_keys(stmt: &str) -> Vec<ForeignKey> {
395    let mut fks = Vec::new();
396
397    for caps in FOREIGN_KEY_RE.captures_iter(stmt) {
398        let name = caps.get(1).map(|m| m.as_str().to_string());
399        let local_cols = caps
400            .get(2)
401            .map(|m| parse_column_list(m.as_str()))
402            .unwrap_or_default();
403        let ref_table = caps
404            .get(3)
405            .map(|m| m.as_str().to_string())
406            .unwrap_or_default();
407        let ref_cols = caps
408            .get(4)
409            .map(|m| parse_column_list(m.as_str()))
410            .unwrap_or_default();
411
412        if !local_cols.is_empty() && !ref_table.is_empty() && !ref_cols.is_empty() {
413            fks.push(ForeignKey {
414                name,
415                columns: Vec::new(), // Will be resolved later
416                column_names: local_cols,
417                referenced_table: ref_table,
418                referenced_columns: ref_cols,
419                referenced_table_id: None,
420            });
421        }
422    }
423
424    fks
425}
426
427/// Parse a comma-separated column list, stripping backticks and whitespace
428pub fn parse_column_list(s: &str) -> Vec<String> {
429    s.split(',')
430        .map(|c| c.trim().trim_matches('`').trim_matches('"').to_string())
431        .filter(|c| !c.is_empty())
432        .collect()
433}