sql_splitter/schema/
ddl.rs1use super::{Column, ColumnId, ColumnType, ForeignKey, IndexDef, Schema, TableId, TableSchema};
9use once_cell::sync::Lazy;
10use regex::Regex;
11
12static CREATE_TABLE_NAME_RE: Lazy<Regex> = Lazy::new(|| {
15 Regex::new(r#"(?i)CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(?:\w+\.)?[`"]?([^`"\s(]+)[`"]?"#)
16 .unwrap()
17});
18
19static ALTER_TABLE_NAME_RE: Lazy<Regex> = Lazy::new(|| {
22 Regex::new(r#"(?i)ALTER\s+TABLE\s+(?:ONLY\s+)?(?:\w+\.)?[`"]?([^`"\s]+)[`"]?"#).unwrap()
23});
24
25static COLUMN_DEF_RE: Lazy<Regex> = Lazy::new(|| {
28 Regex::new(r#"^\s*[`"]?([^`"\s,]+)[`"]?\s+(\w+(?:\([^)]+\))?(?:\s+unsigned)?)"#).unwrap()
29});
30
31static PRIMARY_KEY_RE: Lazy<Regex> =
33 Lazy::new(|| Regex::new(r"(?i)PRIMARY\s+KEY\s*\(([^)]+)\)").unwrap());
34
35static INLINE_PRIMARY_KEY_RE: Lazy<Regex> =
37 Lazy::new(|| Regex::new(r"(?i)\bPRIMARY\s+KEY\b").unwrap());
38
39static FOREIGN_KEY_RE: Lazy<Regex> = Lazy::new(|| {
42 Regex::new(
43 r#"(?i)(?:CONSTRAINT\s+[`"]?([^`"\s]+)[`"]?\s+)?FOREIGN\s+KEY\s*\(([^)]+)\)\s*REFERENCES\s+(?:\w+\.)?[`"]?([^`"\s(]+)[`"]?\s*\(([^)]+)\)"#,
44 )
45 .unwrap()
46});
47
48static NOT_NULL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bNOT\s+NULL\b").unwrap());
50
51static INLINE_INDEX_RE: Lazy<Regex> = Lazy::new(|| {
54 Regex::new(r#"(?i)(?:(UNIQUE)\s+)?(?:INDEX|KEY)\s+[`"]?(\w+)[`"]?\s*\(([^)]+)\)"#).unwrap()
55});
56
57static CREATE_INDEX_RE: Lazy<Regex> = Lazy::new(|| {
60 Regex::new(
61 r#"(?i)CREATE\s+(UNIQUE\s+)?INDEX\s+(?:IF\s+NOT\s+EXISTS\s+)?[`"]?(\w+)[`"]?\s+ON\s+(?:\w+\.)?[`"]?(\w+)[`"]?\s*(?:USING\s+(\w+)\s*)?\(([^)]+)\)"#,
62 )
63 .unwrap()
64});
65
66#[derive(Debug, Default)]
68pub struct SchemaBuilder {
69 schema: Schema,
70}
71
72impl SchemaBuilder {
73 pub fn new() -> Self {
75 Self {
76 schema: Schema::new(),
77 }
78 }
79
80 pub fn parse_create_table(&mut self, stmt: &str) -> Option<TableId> {
82 let table_name = extract_create_table_name(stmt)?;
83
84 if self.schema.get_table_id(&table_name).is_some() {
86 return self.schema.get_table_id(&table_name);
87 }
88
89 let mut table = TableSchema::new(table_name, TableId(0));
90 table.create_statement = Some(stmt.to_string());
91
92 let body = extract_table_body(stmt)?;
94
95 parse_table_body(&body, &mut table);
97
98 Some(self.schema.add_table(table))
100 }
101
102 pub fn parse_alter_table(&mut self, stmt: &str) -> Option<TableId> {
104 let table_name = extract_alter_table_name(stmt)?;
105 let table_id = self.schema.get_table_id(&table_name)?;
106
107 for fk in parse_foreign_keys(stmt) {
109 if let Some(table) = self.schema.table_mut(table_id) {
110 let mut resolved_fk = fk;
112 resolved_fk.columns = resolved_fk
113 .column_names
114 .iter()
115 .filter_map(|name| table.get_column_id(name))
116 .collect();
117 table.foreign_keys.push(resolved_fk);
118 }
119 }
120
121 Some(table_id)
122 }
123
124 pub fn parse_create_index(&mut self, stmt: &str) -> Option<TableId> {
126 let caps = CREATE_INDEX_RE.captures(stmt)?;
127
128 let is_unique = caps.get(1).is_some();
129 let index_name = caps.get(2)?.as_str().to_string();
130 let table_name = caps.get(3)?.as_str().to_string();
131 let index_type = caps.get(4).map(|m| m.as_str().to_uppercase());
132 let columns_str = caps.get(5)?.as_str();
133 let columns = parse_column_list(columns_str);
134
135 let table_id = self.schema.get_table_id(&table_name)?;
136
137 if let Some(table) = self.schema.table_mut(table_id) {
138 table.indexes.push(IndexDef {
139 name: index_name,
140 columns,
141 is_unique,
142 index_type,
143 });
144 }
145
146 Some(table_id)
147 }
148
149 pub fn build(mut self) -> Schema {
151 self.schema.resolve_foreign_keys();
152 self.schema
153 }
154
155 pub fn schema(&self) -> &Schema {
157 &self.schema
158 }
159}
160
161pub fn extract_create_table_name(stmt: &str) -> Option<String> {
163 CREATE_TABLE_NAME_RE
164 .captures(stmt)
165 .and_then(|c| c.get(1))
166 .map(|m| m.as_str().to_string())
167}
168
169pub fn extract_alter_table_name(stmt: &str) -> Option<String> {
171 ALTER_TABLE_NAME_RE
172 .captures(stmt)
173 .and_then(|c| c.get(1))
174 .map(|m| m.as_str().to_string())
175}
176
177fn extract_table_body(stmt: &str) -> Option<String> {
179 let bytes = stmt.as_bytes();
180 let mut depth = 0;
181 let mut start = None;
182 let mut in_string = false;
183 let mut escape_next = false;
184
185 for (i, &b) in bytes.iter().enumerate() {
186 if escape_next {
187 escape_next = false;
188 continue;
189 }
190
191 if b == b'\\' && in_string {
192 escape_next = true;
193 continue;
194 }
195
196 if b == b'\'' {
197 in_string = !in_string;
198 continue;
199 }
200
201 if in_string {
202 continue;
203 }
204
205 if b == b'(' {
206 if depth == 0 {
207 start = Some(i + 1);
208 }
209 depth += 1;
210 } else if b == b')' {
211 depth -= 1;
212 if depth == 0 {
213 if let Some(s) = start {
214 return Some(stmt[s..i].to_string());
215 }
216 }
217 }
218 }
219
220 None
221}
222
223fn parse_table_body(body: &str, table: &mut TableSchema) {
225 let parts = split_table_body(body);
227
228 for part in parts {
229 let trimmed = part.trim();
230 if trimmed.is_empty() {
231 continue;
232 }
233
234 let upper = trimmed.to_uppercase();
236 if upper.starts_with("PRIMARY KEY")
237 || upper.starts_with("CONSTRAINT")
238 || upper.starts_with("FOREIGN KEY")
239 || upper.starts_with("KEY ")
240 || upper.starts_with("INDEX ")
241 || upper.starts_with("UNIQUE ")
242 || upper.starts_with("FULLTEXT ")
243 || upper.starts_with("SPATIAL ")
244 || upper.starts_with("CHECK ")
245 {
246 if let Some(pk_cols) = parse_primary_key_constraint(trimmed) {
248 for col_name in pk_cols {
249 if let Some(col) = table
250 .columns
251 .iter_mut()
252 .find(|c| c.name.eq_ignore_ascii_case(&col_name))
253 {
254 col.is_primary_key = true;
255 if !table.primary_key.contains(&col.ordinal) {
256 table.primary_key.push(col.ordinal);
257 }
258 }
259 }
260 }
261
262 for fk in parse_foreign_keys(trimmed) {
263 let mut resolved_fk = fk;
264 resolved_fk.columns = resolved_fk
265 .column_names
266 .iter()
267 .filter_map(|name| table.get_column_id(name))
268 .collect();
269 table.foreign_keys.push(resolved_fk);
270 }
271
272 if let Some(idx) = parse_inline_index(trimmed) {
274 table.indexes.push(idx);
275 }
276 } else {
277 if let Some(col) = parse_column_def(trimmed, ColumnId(table.columns.len() as u16)) {
279 if INLINE_PRIMARY_KEY_RE.is_match(trimmed) {
281 let mut col = col;
282 col.is_primary_key = true;
283 table.primary_key.push(col.ordinal);
284 table.columns.push(col);
285 } else {
286 table.columns.push(col);
287 }
288 }
289 }
290 }
291}
292
293pub fn split_table_body(body: &str) -> Vec<String> {
295 let mut parts = Vec::new();
296 let mut current = String::new();
297 let mut depth = 0;
298 let mut in_string = false;
299 let mut escape_next = false;
300
301 for ch in body.chars() {
302 if escape_next {
303 current.push(ch);
304 escape_next = false;
305 continue;
306 }
307
308 if ch == '\\' && in_string {
309 current.push(ch);
310 escape_next = true;
311 continue;
312 }
313
314 if ch == '\'' {
315 in_string = !in_string;
316 current.push(ch);
317 continue;
318 }
319
320 if in_string {
321 current.push(ch);
322 continue;
323 }
324
325 match ch {
326 '(' => {
327 depth += 1;
328 current.push(ch);
329 }
330 ')' => {
331 depth -= 1;
332 current.push(ch);
333 }
334 ',' if depth == 0 => {
335 parts.push(current.trim().to_string());
336 current = String::new();
337 }
338 _ => {
339 current.push(ch);
340 }
341 }
342 }
343
344 if !current.trim().is_empty() {
345 parts.push(current.trim().to_string());
346 }
347
348 parts
349}
350
351fn parse_column_def(def: &str, ordinal: ColumnId) -> Option<Column> {
353 let caps = COLUMN_DEF_RE.captures(def)?;
354 let name = caps.get(1)?.as_str().to_string();
355 let type_str = caps.get(2)?.as_str();
356
357 let col_type = ColumnType::from_mysql_type(type_str);
358 let is_nullable = !NOT_NULL_RE.is_match(def);
359
360 Some(Column {
361 name,
362 col_type,
363 ordinal,
364 is_primary_key: false,
365 is_nullable,
366 })
367}
368
369fn parse_primary_key_constraint(constraint: &str) -> Option<Vec<String>> {
371 let caps = PRIMARY_KEY_RE.captures(constraint)?;
372 let cols_str = caps.get(1)?.as_str();
373 Some(parse_column_list(cols_str))
374}
375
376fn parse_inline_index(constraint: &str) -> Option<IndexDef> {
378 let caps = INLINE_INDEX_RE.captures(constraint)?;
379
380 let is_unique = caps.get(1).is_some();
381 let index_name = caps.get(2)?.as_str().to_string();
382 let columns_str = caps.get(3)?.as_str();
383 let columns = parse_column_list(columns_str);
384
385 Some(IndexDef {
386 name: index_name,
387 columns,
388 is_unique,
389 index_type: None, })
391}
392
393fn parse_foreign_keys(stmt: &str) -> Vec<ForeignKey> {
395 let mut fks = Vec::new();
396
397 for caps in FOREIGN_KEY_RE.captures_iter(stmt) {
398 let name = caps.get(1).map(|m| m.as_str().to_string());
399 let local_cols = caps
400 .get(2)
401 .map(|m| parse_column_list(m.as_str()))
402 .unwrap_or_default();
403 let ref_table = caps
404 .get(3)
405 .map(|m| m.as_str().to_string())
406 .unwrap_or_default();
407 let ref_cols = caps
408 .get(4)
409 .map(|m| parse_column_list(m.as_str()))
410 .unwrap_or_default();
411
412 if !local_cols.is_empty() && !ref_table.is_empty() && !ref_cols.is_empty() {
413 fks.push(ForeignKey {
414 name,
415 columns: Vec::new(), column_names: local_cols,
417 referenced_table: ref_table,
418 referenced_columns: ref_cols,
419 referenced_table_id: None,
420 });
421 }
422 }
423
424 fks
425}
426
427pub fn parse_column_list(s: &str) -> Vec<String> {
429 s.split(',')
430 .map(|c| c.trim().trim_matches('`').trim_matches('"').to_string())
431 .filter(|c| !c.is_empty())
432 .collect()
433}