sql_splitter/convert/
types.rs

1//! Data type mapping between SQL dialects.
2//!
3//! Handles conversion of data types including:
4//! - Integer types (TINYINT, SMALLINT, INT, BIGINT)
5//! - Float types (FLOAT, DOUBLE, DECIMAL)
6//! - String types (CHAR, VARCHAR, TEXT)
7//! - Binary types (BLOB, BYTEA)
8//! - Date/time types (DATE, DATETIME, TIMESTAMP)
9//! - Special types (ENUM, SET, JSON)
10
11use crate::parser::SqlDialect;
12use once_cell::sync::Lazy;
13use regex::Regex;
14
15/// Type mapper for converting between dialects
16pub struct TypeMapper;
17
18impl TypeMapper {
19    /// Convert all data types in a statement
20    pub fn convert(stmt: &str, from: SqlDialect, to: SqlDialect) -> String {
21        match (from, to) {
22            (SqlDialect::MySql, SqlDialect::Postgres) => Self::mysql_to_postgres(stmt),
23            (SqlDialect::MySql, SqlDialect::Sqlite) => Self::mysql_to_sqlite(stmt),
24            (SqlDialect::Postgres, SqlDialect::MySql) => Self::postgres_to_mysql(stmt),
25            (SqlDialect::Postgres, SqlDialect::Sqlite) => Self::postgres_to_sqlite(stmt),
26            (SqlDialect::Sqlite, SqlDialect::MySql) => Self::sqlite_to_mysql(stmt),
27            (SqlDialect::Sqlite, SqlDialect::Postgres) => Self::sqlite_to_postgres(stmt),
28            _ => stmt.to_string(),
29        }
30    }
31
32    /// Convert MySQL types to PostgreSQL
33    fn mysql_to_postgres(stmt: &str) -> String {
34        let mut result = stmt.to_string();
35
36        // Integer types - strip display width, PostgreSQL doesn't use it
37        result = RE_TINYINT_BOOL.replace_all(&result, "BOOLEAN").to_string();
38        result = RE_TINYINT.replace_all(&result, "SMALLINT").to_string();
39        result = RE_SMALLINT.replace_all(&result, "SMALLINT").to_string();
40        result = RE_MEDIUMINT.replace_all(&result, "INTEGER").to_string();
41        result = RE_INT_SIZE.replace_all(&result, "INTEGER").to_string();
42        result = RE_BIGINT_SIZE.replace_all(&result, "BIGINT").to_string();
43
44        // Float types
45        result = RE_DOUBLE
46            .replace_all(&result, "DOUBLE PRECISION")
47            .to_string();
48        result = RE_FLOAT.replace_all(&result, "REAL").to_string();
49
50        // Text types
51        result = RE_LONGTEXT.replace_all(&result, "TEXT").to_string();
52        result = RE_MEDIUMTEXT.replace_all(&result, "TEXT").to_string();
53        result = RE_TINYTEXT.replace_all(&result, "TEXT").to_string();
54
55        // Binary types
56        result = RE_LONGBLOB.replace_all(&result, "BYTEA").to_string();
57        result = RE_MEDIUMBLOB.replace_all(&result, "BYTEA").to_string();
58        result = RE_TINYBLOB.replace_all(&result, "BYTEA").to_string();
59        result = RE_BLOB.replace_all(&result, "BYTEA").to_string();
60        result = RE_VARBINARY.replace_all(&result, "BYTEA").to_string();
61        result = RE_BINARY.replace_all(&result, "BYTEA").to_string();
62
63        // Date/time types
64        result = RE_DATETIME.replace_all(&result, "TIMESTAMP").to_string();
65
66        // JSON
67        result = RE_JSON.replace_all(&result, "JSONB").to_string();
68
69        // ENUM - convert to VARCHAR (with warning)
70        result = RE_ENUM.replace_all(&result, "VARCHAR(255)").to_string();
71
72        // SET - convert to VARCHAR (with warning)
73        result = RE_SET.replace_all(&result, "VARCHAR(255)").to_string();
74
75        // UNSIGNED - remove
76        result = RE_UNSIGNED.replace_all(&result, "").to_string();
77
78        // ZEROFILL - remove
79        result = RE_ZEROFILL.replace_all(&result, "").to_string();
80
81        result
82    }
83
84    /// Convert MySQL types to SQLite
85    fn mysql_to_sqlite(stmt: &str) -> String {
86        let mut result = stmt.to_string();
87
88        // SQLite is lenient with types, but we normalize for consistency
89
90        // Integer types - SQLite uses INTEGER
91        result = RE_TINYINT.replace_all(&result, "INTEGER").to_string();
92        result = RE_SMALLINT.replace_all(&result, "INTEGER").to_string();
93        result = RE_MEDIUMINT.replace_all(&result, "INTEGER").to_string();
94        result = RE_INT_SIZE.replace_all(&result, "INTEGER").to_string();
95        result = RE_BIGINT_SIZE.replace_all(&result, "INTEGER").to_string();
96
97        // Float types - SQLite uses REAL
98        result = RE_DOUBLE.replace_all(&result, "REAL").to_string();
99        result = RE_FLOAT.replace_all(&result, "REAL").to_string();
100        result = RE_DECIMAL.replace_all(&result, "REAL").to_string();
101
102        // Text types - all become TEXT
103        result = RE_LONGTEXT.replace_all(&result, "TEXT").to_string();
104        result = RE_MEDIUMTEXT.replace_all(&result, "TEXT").to_string();
105        result = RE_TINYTEXT.replace_all(&result, "TEXT").to_string();
106        result = RE_VARCHAR.replace_all(&result, "TEXT").to_string();
107        result = RE_CHAR.replace_all(&result, "TEXT").to_string();
108
109        // Binary types - SQLite uses BLOB
110        result = RE_LONGBLOB.replace_all(&result, "BLOB").to_string();
111        result = RE_MEDIUMBLOB.replace_all(&result, "BLOB").to_string();
112        result = RE_TINYBLOB.replace_all(&result, "BLOB").to_string();
113        result = RE_VARBINARY.replace_all(&result, "BLOB").to_string();
114        result = RE_BINARY.replace_all(&result, "BLOB").to_string();
115
116        // Date/time - SQLite stores as TEXT or INTEGER
117        result = RE_DATETIME.replace_all(&result, "TEXT").to_string();
118        result = RE_TIMESTAMP.replace_all(&result, "TEXT").to_string();
119        result = RE_DATE.replace_all(&result, "TEXT").to_string();
120        result = RE_TIME.replace_all(&result, "TEXT").to_string();
121
122        // JSON - SQLite stores as TEXT
123        result = RE_JSON.replace_all(&result, "TEXT").to_string();
124
125        // ENUM/SET - convert to TEXT
126        result = RE_ENUM.replace_all(&result, "TEXT").to_string();
127        result = RE_SET.replace_all(&result, "TEXT").to_string();
128
129        // UNSIGNED - remove
130        result = RE_UNSIGNED.replace_all(&result, "").to_string();
131
132        // ZEROFILL - remove
133        result = RE_ZEROFILL.replace_all(&result, "").to_string();
134
135        result
136    }
137
138    /// Convert PostgreSQL types to MySQL
139    fn postgres_to_mysql(stmt: &str) -> String {
140        let mut result = stmt.to_string();
141
142        // SERIAL → INT AUTO_INCREMENT
143        result = RE_BIGSERIAL
144            .replace_all(&result, "BIGINT AUTO_INCREMENT")
145            .to_string();
146        result = RE_SERIAL
147            .replace_all(&result, "INT AUTO_INCREMENT")
148            .to_string();
149        result = RE_SMALLSERIAL
150            .replace_all(&result, "SMALLINT AUTO_INCREMENT")
151            .to_string();
152
153        // BYTEA → LONGBLOB
154        result = RE_BYTEA.replace_all(&result, "LONGBLOB").to_string();
155
156        // DOUBLE PRECISION → DOUBLE
157        result = RE_DOUBLE_PRECISION
158            .replace_all(&result, "DOUBLE")
159            .to_string();
160
161        // REAL → FLOAT
162        result = RE_REAL.replace_all(&result, "FLOAT").to_string();
163
164        // BOOLEAN → TINYINT(1)
165        result = RE_BOOLEAN.replace_all(&result, "TINYINT(1)").to_string();
166
167        // TIMESTAMPTZ → DATETIME
168        result = RE_TIMESTAMPTZ.replace_all(&result, "DATETIME").to_string();
169
170        // TIMESTAMP WITH TIME ZONE → DATETIME
171        result = RE_TIMESTAMP_WITH_TZ
172            .replace_all(&result, "DATETIME")
173            .to_string();
174
175        // TIMESTAMP WITHOUT TIME ZONE → DATETIME
176        result = RE_TIMESTAMP_NO_TZ
177            .replace_all(&result, "DATETIME")
178            .to_string();
179
180        // JSONB → JSON
181        result = RE_JSONB.replace_all(&result, "JSON").to_string();
182
183        // UUID → VARCHAR(36)
184        result = RE_UUID.replace_all(&result, "VARCHAR(36)").to_string();
185
186        result
187    }
188
189    /// Convert PostgreSQL types to SQLite
190    fn postgres_to_sqlite(stmt: &str) -> String {
191        let mut result = stmt.to_string();
192
193        // SERIAL → INTEGER (SQLite auto-increments INTEGER PRIMARY KEY)
194        result = RE_BIGSERIAL.replace_all(&result, "INTEGER").to_string();
195        result = RE_SERIAL.replace_all(&result, "INTEGER").to_string();
196        result = RE_SMALLSERIAL.replace_all(&result, "INTEGER").to_string();
197
198        // BYTEA → BLOB
199        result = RE_BYTEA.replace_all(&result, "BLOB").to_string();
200
201        // DOUBLE PRECISION → REAL
202        result = RE_DOUBLE_PRECISION.replace_all(&result, "REAL").to_string();
203
204        // BOOLEAN → INTEGER
205        result = RE_BOOLEAN.replace_all(&result, "INTEGER").to_string();
206
207        // Timestamps → TEXT
208        result = RE_TIMESTAMPTZ.replace_all(&result, "TEXT").to_string();
209        result = RE_TIMESTAMP_WITH_TZ
210            .replace_all(&result, "TEXT")
211            .to_string();
212        result = RE_TIMESTAMP_NO_TZ.replace_all(&result, "TEXT").to_string();
213
214        // JSONB/JSON → TEXT
215        result = RE_JSONB.replace_all(&result, "TEXT").to_string();
216        result = RE_JSON.replace_all(&result, "TEXT").to_string();
217
218        // UUID → TEXT
219        result = RE_UUID.replace_all(&result, "TEXT").to_string();
220
221        // VARCHAR → TEXT
222        result = RE_VARCHAR.replace_all(&result, "TEXT").to_string();
223
224        result
225    }
226
227    /// Convert SQLite types to MySQL
228    fn sqlite_to_mysql(stmt: &str) -> String {
229        let mut result = stmt.to_string();
230
231        // SQLite uses TEXT for everything, but we can preserve some type info
232        // REAL → DOUBLE
233        result = RE_REAL.replace_all(&result, "DOUBLE").to_string();
234
235        // BLOB stays BLOB
236        // TEXT stays TEXT
237        // INTEGER stays INTEGER (MySQL will handle it)
238
239        result
240    }
241
242    /// Convert SQLite types to PostgreSQL
243    fn sqlite_to_postgres(stmt: &str) -> String {
244        let mut result = stmt.to_string();
245
246        // REAL → DOUBLE PRECISION
247        result = RE_REAL.replace_all(&result, "DOUBLE PRECISION").to_string();
248
249        // BLOB → BYTEA
250        result = RE_BLOB.replace_all(&result, "BYTEA").to_string();
251
252        // INTEGER stays INTEGER
253        // TEXT stays TEXT
254
255        result
256    }
257}
258
259// Pre-compiled regexes for type matching
260static RE_TINYINT_BOOL: Lazy<Regex> =
261    Lazy::new(|| Regex::new(r"(?i)\bTINYINT\s*\(\s*1\s*\)").unwrap());
262static RE_TINYINT: Lazy<Regex> =
263    Lazy::new(|| Regex::new(r"(?i)\bTINYINT\s*(\(\s*\d+\s*\))?").unwrap());
264static RE_SMALLINT: Lazy<Regex> =
265    Lazy::new(|| Regex::new(r"(?i)\bSMALLINT\s*(\(\s*\d+\s*\))?").unwrap());
266static RE_MEDIUMINT: Lazy<Regex> =
267    Lazy::new(|| Regex::new(r"(?i)\bMEDIUMINT\s*(\(\s*\d+\s*\))?").unwrap());
268static RE_INT_SIZE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bINT\s*\(\s*\d+\s*\)").unwrap());
269static RE_BIGINT_SIZE: Lazy<Regex> =
270    Lazy::new(|| Regex::new(r"(?i)\bBIGINT\s*\(\s*\d+\s*\)").unwrap());
271
272static RE_DOUBLE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bDOUBLE\b").unwrap());
273static RE_FLOAT: Lazy<Regex> =
274    Lazy::new(|| Regex::new(r"(?i)\bFLOAT\s*(\(\s*\d+\s*(,\s*\d+\s*)?\))?").unwrap());
275static RE_DECIMAL: Lazy<Regex> =
276    Lazy::new(|| Regex::new(r"(?i)\bDECIMAL\s*\(\s*\d+\s*(,\s*\d+\s*)?\)").unwrap());
277
278static RE_LONGTEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bLONGTEXT\b").unwrap());
279static RE_MEDIUMTEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bMEDIUMTEXT\b").unwrap());
280static RE_TINYTEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bTINYTEXT\b").unwrap());
281static RE_VARCHAR: Lazy<Regex> =
282    Lazy::new(|| Regex::new(r"(?i)\bVARCHAR\s*\(\s*\d+\s*\)").unwrap());
283static RE_CHAR: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bCHAR\s*\(\s*\d+\s*\)").unwrap());
284
285static RE_LONGBLOB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bLONGBLOB\b").unwrap());
286static RE_MEDIUMBLOB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bMEDIUMBLOB\b").unwrap());
287static RE_TINYBLOB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bTINYBLOB\b").unwrap());
288static RE_BLOB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBLOB\b").unwrap());
289static RE_VARBINARY: Lazy<Regex> =
290    Lazy::new(|| Regex::new(r"(?i)\bVARBINARY\s*\(\s*\d+\s*\)").unwrap());
291static RE_BINARY: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBINARY\s*\(\s*\d+\s*\)").unwrap());
292
293static RE_DATETIME: Lazy<Regex> =
294    Lazy::new(|| Regex::new(r"(?i)\bDATETIME(\(\s*\d+\s*\))?").unwrap());
295static RE_TIMESTAMP: Lazy<Regex> =
296    Lazy::new(|| Regex::new(r"(?i)\bTIMESTAMP\s*(\(\s*\d+\s*\))?").unwrap());
297static RE_DATE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bDATE\b").unwrap());
298static RE_TIME: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bTIME\s*(\(\s*\d+\s*\))?").unwrap());
299
300static RE_JSON: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bJSON\b").unwrap());
301
302static RE_ENUM: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bENUM\s*\([^)]+\)").unwrap());
303static RE_SET: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bSET\s*\([^)]+\)").unwrap());
304
305static RE_UNSIGNED: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\s+UNSIGNED\b").unwrap());
306static RE_ZEROFILL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\s+ZEROFILL\b").unwrap());
307
308// PostgreSQL specific types
309static RE_SERIAL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bSERIAL\b").unwrap());
310static RE_BIGSERIAL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBIGSERIAL\b").unwrap());
311static RE_SMALLSERIAL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bSMALLSERIAL\b").unwrap());
312static RE_BYTEA: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBYTEA\b").unwrap());
313static RE_DOUBLE_PRECISION: Lazy<Regex> =
314    Lazy::new(|| Regex::new(r"(?i)\bDOUBLE\s+PRECISION\b").unwrap());
315static RE_REAL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bREAL\b").unwrap());
316static RE_BOOLEAN: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBOOLEAN\b").unwrap());
317static RE_TIMESTAMPTZ: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bTIMESTAMPTZ\b").unwrap());
318static RE_TIMESTAMP_WITH_TZ: Lazy<Regex> =
319    Lazy::new(|| Regex::new(r"(?i)\bTIMESTAMP\s+WITH\s+TIME\s+ZONE\b").unwrap());
320static RE_TIMESTAMP_NO_TZ: Lazy<Regex> =
321    Lazy::new(|| Regex::new(r"(?i)\bTIMESTAMP\s+WITHOUT\s+TIME\s+ZONE\b").unwrap());
322static RE_JSONB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bJSONB\b").unwrap());
323static RE_UUID: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bUUID\b").unwrap());