Skip to main content

sql_splitter/convert/
types.rs

1//! Data type mapping between SQL dialects.
2//!
3//! Handles conversion of data types including:
4//! - Integer types (TINYINT, SMALLINT, INT, BIGINT)
5//! - Float types (FLOAT, DOUBLE, DECIMAL)
6//! - String types (CHAR, VARCHAR, TEXT)
7//! - Binary types (BLOB, BYTEA)
8//! - Date/time types (DATE, DATETIME, TIMESTAMP)
9//! - Special types (ENUM, SET, JSON)
10
11use crate::parser::SqlDialect;
12use once_cell::sync::Lazy;
13use regex::Regex;
14
15/// Type mapper for converting between dialects
16pub struct TypeMapper;
17
18impl TypeMapper {
19    /// Convert all data types in a statement
20    pub fn convert(stmt: &str, from: SqlDialect, to: SqlDialect) -> String {
21        match (from, to) {
22            (SqlDialect::MySql, SqlDialect::Postgres) => Self::mysql_to_postgres(stmt),
23            (SqlDialect::MySql, SqlDialect::Sqlite) => Self::mysql_to_sqlite(stmt),
24            (SqlDialect::MySql, SqlDialect::Mssql) => Self::mysql_to_mssql(stmt),
25            (SqlDialect::Postgres, SqlDialect::MySql) => Self::postgres_to_mysql(stmt),
26            (SqlDialect::Postgres, SqlDialect::Sqlite) => Self::postgres_to_sqlite(stmt),
27            (SqlDialect::Postgres, SqlDialect::Mssql) => Self::postgres_to_mssql(stmt),
28            (SqlDialect::Sqlite, SqlDialect::MySql) => Self::sqlite_to_mysql(stmt),
29            (SqlDialect::Sqlite, SqlDialect::Postgres) => Self::sqlite_to_postgres(stmt),
30            (SqlDialect::Sqlite, SqlDialect::Mssql) => Self::sqlite_to_mssql(stmt),
31            (SqlDialect::Mssql, SqlDialect::MySql) => Self::mssql_to_mysql(stmt),
32            (SqlDialect::Mssql, SqlDialect::Postgres) => Self::mssql_to_postgres(stmt),
33            (SqlDialect::Mssql, SqlDialect::Sqlite) => Self::mssql_to_sqlite(stmt),
34            _ => stmt.to_string(),
35        }
36    }
37
38    /// Convert MySQL types to PostgreSQL
39    fn mysql_to_postgres(stmt: &str) -> String {
40        let mut result = stmt.to_string();
41
42        // Integer types - strip display width, PostgreSQL doesn't use it
43        result = RE_TINYINT_BOOL.replace_all(&result, "BOOLEAN").to_string();
44        result = RE_TINYINT.replace_all(&result, "SMALLINT").to_string();
45        result = RE_SMALLINT.replace_all(&result, "SMALLINT").to_string();
46        result = RE_MEDIUMINT.replace_all(&result, "INTEGER").to_string();
47        result = RE_INT_SIZE.replace_all(&result, "INTEGER").to_string();
48        result = RE_BIGINT_SIZE.replace_all(&result, "BIGINT").to_string();
49
50        // Float types
51        result = RE_DOUBLE
52            .replace_all(&result, "DOUBLE PRECISION")
53            .to_string();
54        result = RE_FLOAT.replace_all(&result, "REAL").to_string();
55
56        // Text types
57        result = RE_LONGTEXT.replace_all(&result, "TEXT").to_string();
58        result = RE_MEDIUMTEXT.replace_all(&result, "TEXT").to_string();
59        result = RE_TINYTEXT.replace_all(&result, "TEXT").to_string();
60
61        // Binary types
62        result = RE_LONGBLOB.replace_all(&result, "BYTEA").to_string();
63        result = RE_MEDIUMBLOB.replace_all(&result, "BYTEA").to_string();
64        result = RE_TINYBLOB.replace_all(&result, "BYTEA").to_string();
65        result = RE_BLOB.replace_all(&result, "BYTEA").to_string();
66        result = RE_VARBINARY.replace_all(&result, "BYTEA").to_string();
67        result = RE_BINARY.replace_all(&result, "BYTEA").to_string();
68
69        // Date/time types
70        result = RE_DATETIME.replace_all(&result, "TIMESTAMP").to_string();
71
72        // JSON
73        result = RE_JSON.replace_all(&result, "JSONB").to_string();
74
75        // ENUM - convert to VARCHAR (with warning)
76        result = RE_ENUM.replace_all(&result, "VARCHAR(255)").to_string();
77
78        // SET - convert to VARCHAR (with warning)
79        result = RE_SET.replace_all(&result, "VARCHAR(255)").to_string();
80
81        // UNSIGNED - remove
82        result = RE_UNSIGNED.replace_all(&result, "").to_string();
83
84        // ZEROFILL - remove
85        result = RE_ZEROFILL.replace_all(&result, "").to_string();
86
87        result
88    }
89
90    /// Convert MySQL types to SQLite
91    fn mysql_to_sqlite(stmt: &str) -> String {
92        let mut result = stmt.to_string();
93
94        // SQLite is lenient with types, but we normalize for consistency
95
96        // Integer types - SQLite uses INTEGER
97        result = RE_TINYINT.replace_all(&result, "INTEGER").to_string();
98        result = RE_SMALLINT.replace_all(&result, "INTEGER").to_string();
99        result = RE_MEDIUMINT.replace_all(&result, "INTEGER").to_string();
100        result = RE_INT_SIZE.replace_all(&result, "INTEGER").to_string();
101        result = RE_BIGINT_SIZE.replace_all(&result, "INTEGER").to_string();
102
103        // Float types - SQLite uses REAL
104        result = RE_DOUBLE.replace_all(&result, "REAL").to_string();
105        result = RE_FLOAT.replace_all(&result, "REAL").to_string();
106        result = RE_DECIMAL.replace_all(&result, "REAL").to_string();
107
108        // Text types - all become TEXT
109        result = RE_LONGTEXT.replace_all(&result, "TEXT").to_string();
110        result = RE_MEDIUMTEXT.replace_all(&result, "TEXT").to_string();
111        result = RE_TINYTEXT.replace_all(&result, "TEXT").to_string();
112        result = RE_VARCHAR.replace_all(&result, "TEXT").to_string();
113        result = RE_CHAR.replace_all(&result, "TEXT").to_string();
114
115        // Binary types - SQLite uses BLOB
116        result = RE_LONGBLOB.replace_all(&result, "BLOB").to_string();
117        result = RE_MEDIUMBLOB.replace_all(&result, "BLOB").to_string();
118        result = RE_TINYBLOB.replace_all(&result, "BLOB").to_string();
119        result = RE_VARBINARY.replace_all(&result, "BLOB").to_string();
120        result = RE_BINARY.replace_all(&result, "BLOB").to_string();
121
122        // Date/time - SQLite stores as TEXT or INTEGER
123        result = RE_DATETIME.replace_all(&result, "TEXT").to_string();
124        result = RE_TIMESTAMP.replace_all(&result, "TEXT").to_string();
125        result = RE_DATE.replace_all(&result, "TEXT").to_string();
126        result = RE_TIME.replace_all(&result, "TEXT").to_string();
127
128        // JSON - SQLite stores as TEXT
129        result = RE_JSON.replace_all(&result, "TEXT").to_string();
130
131        // ENUM/SET - convert to TEXT
132        result = RE_ENUM.replace_all(&result, "TEXT").to_string();
133        result = RE_SET.replace_all(&result, "TEXT").to_string();
134
135        // UNSIGNED - remove
136        result = RE_UNSIGNED.replace_all(&result, "").to_string();
137
138        // ZEROFILL - remove
139        result = RE_ZEROFILL.replace_all(&result, "").to_string();
140
141        result
142    }
143
144    /// Convert PostgreSQL types to MySQL
145    fn postgres_to_mysql(stmt: &str) -> String {
146        let mut result = stmt.to_string();
147
148        // SERIAL → INT AUTO_INCREMENT
149        result = RE_BIGSERIAL
150            .replace_all(&result, "BIGINT AUTO_INCREMENT")
151            .to_string();
152        result = RE_SERIAL
153            .replace_all(&result, "INT AUTO_INCREMENT")
154            .to_string();
155        result = RE_SMALLSERIAL
156            .replace_all(&result, "SMALLINT AUTO_INCREMENT")
157            .to_string();
158
159        // BYTEA → LONGBLOB
160        result = RE_BYTEA.replace_all(&result, "LONGBLOB").to_string();
161
162        // DOUBLE PRECISION → DOUBLE
163        result = RE_DOUBLE_PRECISION
164            .replace_all(&result, "DOUBLE")
165            .to_string();
166
167        // REAL → FLOAT
168        result = RE_REAL.replace_all(&result, "FLOAT").to_string();
169
170        // BOOLEAN → TINYINT(1)
171        result = RE_BOOLEAN.replace_all(&result, "TINYINT(1)").to_string();
172
173        // TIMESTAMPTZ → DATETIME
174        result = RE_TIMESTAMPTZ.replace_all(&result, "DATETIME").to_string();
175
176        // TIMESTAMP WITH TIME ZONE → DATETIME
177        result = RE_TIMESTAMP_WITH_TZ
178            .replace_all(&result, "DATETIME")
179            .to_string();
180
181        // TIMESTAMP WITHOUT TIME ZONE → DATETIME
182        result = RE_TIMESTAMP_NO_TZ
183            .replace_all(&result, "DATETIME")
184            .to_string();
185
186        // JSONB → JSON
187        result = RE_JSONB.replace_all(&result, "JSON").to_string();
188
189        // UUID → VARCHAR(36)
190        result = RE_UUID.replace_all(&result, "VARCHAR(36)").to_string();
191
192        result
193    }
194
195    /// Convert PostgreSQL types to SQLite
196    fn postgres_to_sqlite(stmt: &str) -> String {
197        let mut result = stmt.to_string();
198
199        // SERIAL → INTEGER (SQLite auto-increments INTEGER PRIMARY KEY)
200        result = RE_BIGSERIAL.replace_all(&result, "INTEGER").to_string();
201        result = RE_SERIAL.replace_all(&result, "INTEGER").to_string();
202        result = RE_SMALLSERIAL.replace_all(&result, "INTEGER").to_string();
203
204        // BYTEA → BLOB
205        result = RE_BYTEA.replace_all(&result, "BLOB").to_string();
206
207        // DOUBLE PRECISION → REAL
208        result = RE_DOUBLE_PRECISION.replace_all(&result, "REAL").to_string();
209
210        // BOOLEAN → INTEGER
211        result = RE_BOOLEAN.replace_all(&result, "INTEGER").to_string();
212
213        // Timestamps → TEXT
214        result = RE_TIMESTAMPTZ.replace_all(&result, "TEXT").to_string();
215        result = RE_TIMESTAMP_WITH_TZ
216            .replace_all(&result, "TEXT")
217            .to_string();
218        result = RE_TIMESTAMP_NO_TZ.replace_all(&result, "TEXT").to_string();
219
220        // JSONB/JSON → TEXT
221        result = RE_JSONB.replace_all(&result, "TEXT").to_string();
222        result = RE_JSON.replace_all(&result, "TEXT").to_string();
223
224        // UUID → TEXT
225        result = RE_UUID.replace_all(&result, "TEXT").to_string();
226
227        // VARCHAR → TEXT
228        result = RE_VARCHAR.replace_all(&result, "TEXT").to_string();
229
230        result
231    }
232
233    /// Convert SQLite types to MySQL
234    fn sqlite_to_mysql(stmt: &str) -> String {
235        let mut result = stmt.to_string();
236
237        // SQLite uses TEXT for everything, but we can preserve some type info
238        // REAL → DOUBLE
239        result = RE_REAL.replace_all(&result, "DOUBLE").to_string();
240
241        // BLOB stays BLOB
242        // TEXT stays TEXT
243        // INTEGER stays INTEGER (MySQL will handle it)
244
245        result
246    }
247
248    /// Convert SQLite types to PostgreSQL
249    fn sqlite_to_postgres(stmt: &str) -> String {
250        let mut result = stmt.to_string();
251
252        // REAL → DOUBLE PRECISION
253        result = RE_REAL.replace_all(&result, "DOUBLE PRECISION").to_string();
254
255        // BLOB → BYTEA
256        result = RE_BLOB.replace_all(&result, "BYTEA").to_string();
257
258        // INTEGER stays INTEGER
259        // TEXT stays TEXT
260
261        result
262    }
263
264    /// Convert MySQL types to MSSQL
265    fn mysql_to_mssql(stmt: &str) -> String {
266        let mut result = stmt.to_string();
267
268        // AUTO_INCREMENT → IDENTITY(1,1) (handled elsewhere in convert_auto_increment)
269
270        // Integer types - strip display width
271        result = RE_TINYINT_BOOL.replace_all(&result, "BIT").to_string();
272        result = RE_TINYINT.replace_all(&result, "TINYINT").to_string();
273        result = RE_SMALLINT.replace_all(&result, "SMALLINT").to_string();
274        result = RE_MEDIUMINT.replace_all(&result, "INT").to_string();
275        result = RE_INT_SIZE.replace_all(&result, "INT").to_string();
276        result = RE_BIGINT_SIZE.replace_all(&result, "BIGINT").to_string();
277
278        // Float types
279        result = RE_DOUBLE.replace_all(&result, "FLOAT").to_string();
280        result = RE_FLOAT.replace_all(&result, "REAL").to_string();
281
282        // Text types
283        result = RE_LONGTEXT
284            .replace_all(&result, "NVARCHAR(MAX)")
285            .to_string();
286        result = RE_MEDIUMTEXT
287            .replace_all(&result, "NVARCHAR(MAX)")
288            .to_string();
289        result = RE_TINYTEXT
290            .replace_all(&result, "NVARCHAR(255)")
291            .to_string();
292
293        // Binary types
294        result = RE_LONGBLOB
295            .replace_all(&result, "VARBINARY(MAX)")
296            .to_string();
297        result = RE_MEDIUMBLOB
298            .replace_all(&result, "VARBINARY(MAX)")
299            .to_string();
300        result = RE_TINYBLOB
301            .replace_all(&result, "VARBINARY(255)")
302            .to_string();
303        result = RE_BLOB.replace_all(&result, "VARBINARY(MAX)").to_string();
304
305        // Date/time types
306        result = RE_DATETIME.replace_all(&result, "DATETIME2").to_string();
307
308        // JSON → NVARCHAR(MAX)
309        result = RE_JSON.replace_all(&result, "NVARCHAR(MAX)").to_string();
310
311        // ENUM → NVARCHAR(255)
312        result = RE_ENUM.replace_all(&result, "NVARCHAR(255)").to_string();
313
314        // SET → NVARCHAR(255)
315        result = RE_SET.replace_all(&result, "NVARCHAR(255)").to_string();
316
317        // UNSIGNED - remove
318        result = RE_UNSIGNED.replace_all(&result, "").to_string();
319
320        // ZEROFILL - remove
321        result = RE_ZEROFILL.replace_all(&result, "").to_string();
322
323        result
324    }
325
326    /// Convert PostgreSQL types to MSSQL
327    fn postgres_to_mssql(stmt: &str) -> String {
328        let mut result = stmt.to_string();
329
330        // SERIAL → INT IDENTITY(1,1)
331        result = RE_BIGSERIAL
332            .replace_all(&result, "BIGINT IDENTITY(1,1)")
333            .to_string();
334        result = RE_SERIAL
335            .replace_all(&result, "INT IDENTITY(1,1)")
336            .to_string();
337        result = RE_SMALLSERIAL
338            .replace_all(&result, "SMALLINT IDENTITY(1,1)")
339            .to_string();
340
341        // BYTEA → VARBINARY(MAX)
342        result = RE_BYTEA.replace_all(&result, "VARBINARY(MAX)").to_string();
343
344        // DOUBLE PRECISION → FLOAT
345        result = RE_DOUBLE_PRECISION
346            .replace_all(&result, "FLOAT")
347            .to_string();
348
349        // REAL stays REAL
350
351        // BOOLEAN → BIT
352        result = RE_BOOLEAN.replace_all(&result, "BIT").to_string();
353
354        // TIMESTAMPTZ → DATETIMEOFFSET
355        result = RE_TIMESTAMPTZ
356            .replace_all(&result, "DATETIMEOFFSET")
357            .to_string();
358
359        // TIMESTAMP WITH TIME ZONE → DATETIMEOFFSET
360        result = RE_TIMESTAMP_WITH_TZ
361            .replace_all(&result, "DATETIMEOFFSET")
362            .to_string();
363
364        // TIMESTAMP WITHOUT TIME ZONE → DATETIME2
365        result = RE_TIMESTAMP_NO_TZ
366            .replace_all(&result, "DATETIME2")
367            .to_string();
368
369        // JSONB → NVARCHAR(MAX)
370        result = RE_JSONB.replace_all(&result, "NVARCHAR(MAX)").to_string();
371
372        // JSON → NVARCHAR(MAX)
373        result = RE_JSON.replace_all(&result, "NVARCHAR(MAX)").to_string();
374
375        // UUID → UNIQUEIDENTIFIER
376        result = RE_UUID.replace_all(&result, "UNIQUEIDENTIFIER").to_string();
377
378        // TEXT → NVARCHAR(MAX)
379        result = RE_TEXT.replace_all(&result, "NVARCHAR(MAX)").to_string();
380
381        result
382    }
383
384    /// Convert SQLite types to MSSQL
385    fn sqlite_to_mssql(stmt: &str) -> String {
386        let mut result = stmt.to_string();
387
388        // REAL → FLOAT
389        result = RE_REAL.replace_all(&result, "FLOAT").to_string();
390
391        // BLOB → VARBINARY(MAX)
392        result = RE_BLOB.replace_all(&result, "VARBINARY(MAX)").to_string();
393
394        // TEXT → NVARCHAR(MAX)
395        result = RE_TEXT.replace_all(&result, "NVARCHAR(MAX)").to_string();
396
397        result
398    }
399
400    /// Convert MSSQL types to MySQL
401    fn mssql_to_mysql(stmt: &str) -> String {
402        let mut result = stmt.to_string();
403
404        // IDENTITY → AUTO_INCREMENT (handled elsewhere)
405
406        // BIT → TINYINT(1)
407        result = RE_BIT.replace_all(&result, "TINYINT(1)").to_string();
408
409        // NVARCHAR(MAX) → LONGTEXT
410        result = RE_NVARCHAR_MAX.replace_all(&result, "LONGTEXT").to_string();
411
412        // NVARCHAR(n) → VARCHAR(n)
413        result = RE_NVARCHAR.replace_all(&result, "VARCHAR$1").to_string();
414
415        // NCHAR(n) → CHAR(n)
416        result = RE_NCHAR.replace_all(&result, "CHAR$1").to_string();
417
418        // NTEXT → LONGTEXT
419        result = RE_NTEXT.replace_all(&result, "LONGTEXT").to_string();
420
421        // VARCHAR(MAX) → LONGTEXT
422        result = RE_VARCHAR_MAX.replace_all(&result, "LONGTEXT").to_string();
423
424        // VARBINARY(MAX) → LONGBLOB
425        result = RE_VARBINARY_MAX
426            .replace_all(&result, "LONGBLOB")
427            .to_string();
428
429        // IMAGE → LONGBLOB
430        result = RE_IMAGE.replace_all(&result, "LONGBLOB").to_string();
431
432        // DATETIME2 → DATETIME(6)
433        result = RE_DATETIME2.replace_all(&result, "DATETIME(6)").to_string();
434
435        // DATETIMEOFFSET → DATETIME
436        result = RE_DATETIMEOFFSET
437            .replace_all(&result, "DATETIME")
438            .to_string();
439
440        // SMALLDATETIME → DATETIME
441        result = RE_SMALLDATETIME
442            .replace_all(&result, "DATETIME")
443            .to_string();
444
445        // MONEY → DECIMAL(19,4)
446        result = RE_MONEY.replace_all(&result, "DECIMAL(19,4)").to_string();
447
448        // SMALLMONEY → DECIMAL(10,4)
449        result = RE_SMALLMONEY
450            .replace_all(&result, "DECIMAL(10,4)")
451            .to_string();
452
453        // UNIQUEIDENTIFIER → VARCHAR(36)
454        result = RE_UNIQUEIDENTIFIER
455            .replace_all(&result, "VARCHAR(36)")
456            .to_string();
457
458        // XML → LONGTEXT
459        result = RE_XML.replace_all(&result, "LONGTEXT").to_string();
460
461        // ROWVERSION/MSSQL TIMESTAMP → BINARY(8)
462        result = RE_MSSQL_TIMESTAMP_BRACKETED
463            .replace_all(&result, "BINARY(8)")
464            .to_string();
465        result = RE_ROWVERSION_ONLY
466            .replace_all(&result, "BINARY(8)")
467            .to_string();
468
469        // Strip MSSQL-specific clauses
470        result = RE_ON_PRIMARY.replace_all(&result, "").to_string();
471        result = RE_CLUSTERED.replace_all(&result, "").to_string();
472        result = RE_NONCLUSTERED.replace_all(&result, "").to_string();
473
474        result
475    }
476
477    /// Convert MSSQL types to PostgreSQL
478    fn mssql_to_postgres(stmt: &str) -> String {
479        let mut result = stmt.to_string();
480
481        // IDENTITY → SERIAL (handled elsewhere)
482
483        // IMPORTANT: Handle ROWVERSION first (before any TIMESTAMP conversion)
484        // In MSSQL, TIMESTAMP is an alias for ROWVERSION (a binary type, not datetime!)
485        // Use a more specific regex that matches MSSQL TIMESTAMP but not PostgreSQL TIMESTAMP
486        result = RE_MSSQL_TIMESTAMP_BRACKETED
487            .replace_all(&result, "BYTEA")
488            .to_string();
489        result = RE_ROWVERSION_ONLY.replace_all(&result, "BYTEA").to_string();
490
491        // BIT → BOOLEAN
492        result = RE_BIT.replace_all(&result, "BOOLEAN").to_string();
493
494        // NVARCHAR(MAX) → TEXT
495        result = RE_NVARCHAR_MAX.replace_all(&result, "TEXT").to_string();
496
497        // NVARCHAR(n) → VARCHAR(n)
498        result = RE_NVARCHAR.replace_all(&result, "VARCHAR$1").to_string();
499
500        // NCHAR(n) → CHAR(n)
501        result = RE_NCHAR.replace_all(&result, "CHAR$1").to_string();
502
503        // NTEXT → TEXT
504        result = RE_NTEXT.replace_all(&result, "TEXT").to_string();
505
506        // VARCHAR(MAX) → TEXT
507        result = RE_VARCHAR_MAX.replace_all(&result, "TEXT").to_string();
508
509        // VARBINARY(MAX) → BYTEA
510        result = RE_VARBINARY_MAX.replace_all(&result, "BYTEA").to_string();
511
512        // VARBINARY(n) → BYTEA
513        result = RE_VARBINARY.replace_all(&result, "BYTEA").to_string();
514
515        // IMAGE → BYTEA
516        result = RE_IMAGE.replace_all(&result, "BYTEA").to_string();
517
518        // DATETIME2 → TIMESTAMP
519        result = RE_DATETIME2.replace_all(&result, "TIMESTAMP").to_string();
520
521        // DATETIME → TIMESTAMP (but not MSSQL TIMESTAMP which is already converted)
522        result = RE_DATETIME.replace_all(&result, "TIMESTAMP").to_string();
523
524        // DATETIMEOFFSET → TIMESTAMPTZ
525        result = RE_DATETIMEOFFSET
526            .replace_all(&result, "TIMESTAMPTZ")
527            .to_string();
528
529        // SMALLDATETIME → TIMESTAMP
530        result = RE_SMALLDATETIME
531            .replace_all(&result, "TIMESTAMP")
532            .to_string();
533
534        // MONEY → DECIMAL(19,4)
535        result = RE_MONEY.replace_all(&result, "DECIMAL(19,4)").to_string();
536
537        // SMALLMONEY → DECIMAL(10,4)
538        result = RE_SMALLMONEY
539            .replace_all(&result, "DECIMAL(10,4)")
540            .to_string();
541
542        // UNIQUEIDENTIFIER → UUID
543        result = RE_UNIQUEIDENTIFIER.replace_all(&result, "UUID").to_string();
544
545        // XML → XML (PostgreSQL supports XML type)
546
547        // FLOAT → DOUBLE PRECISION
548        result = RE_FLOAT
549            .replace_all(&result, "DOUBLE PRECISION")
550            .to_string();
551
552        // Strip MSSQL-specific clauses
553        result = RE_ON_PRIMARY.replace_all(&result, "").to_string();
554        result = RE_CLUSTERED.replace_all(&result, "").to_string();
555        result = RE_NONCLUSTERED.replace_all(&result, "").to_string();
556
557        result
558    }
559
560    /// Convert MSSQL types to SQLite
561    fn mssql_to_sqlite(stmt: &str) -> String {
562        let mut result = stmt.to_string();
563
564        // BIT → INTEGER
565        result = RE_BIT.replace_all(&result, "INTEGER").to_string();
566
567        // NVARCHAR → TEXT
568        result = RE_NVARCHAR_MAX.replace_all(&result, "TEXT").to_string();
569        result = RE_NVARCHAR.replace_all(&result, "TEXT").to_string();
570
571        // NCHAR → TEXT
572        result = RE_NCHAR.replace_all(&result, "TEXT").to_string();
573
574        // NTEXT → TEXT
575        result = RE_NTEXT.replace_all(&result, "TEXT").to_string();
576
577        // VARCHAR(MAX) → TEXT
578        result = RE_VARCHAR_MAX.replace_all(&result, "TEXT").to_string();
579
580        // VARBINARY → BLOB
581        result = RE_VARBINARY_MAX.replace_all(&result, "BLOB").to_string();
582        result = RE_VARBINARY.replace_all(&result, "BLOB").to_string();
583
584        // IMAGE → BLOB
585        result = RE_IMAGE.replace_all(&result, "BLOB").to_string();
586
587        // Date/time → TEXT
588        result = RE_DATETIME2.replace_all(&result, "TEXT").to_string();
589        result = RE_DATETIME.replace_all(&result, "TEXT").to_string();
590        result = RE_DATETIMEOFFSET.replace_all(&result, "TEXT").to_string();
591        result = RE_SMALLDATETIME.replace_all(&result, "TEXT").to_string();
592
593        // MONEY → REAL
594        result = RE_MONEY.replace_all(&result, "REAL").to_string();
595        result = RE_SMALLMONEY.replace_all(&result, "REAL").to_string();
596
597        // UNIQUEIDENTIFIER → TEXT
598        result = RE_UNIQUEIDENTIFIER.replace_all(&result, "TEXT").to_string();
599
600        // XML → TEXT
601        result = RE_XML.replace_all(&result, "TEXT").to_string();
602
603        // ROWVERSION/MSSQL TIMESTAMP → BLOB
604        result = RE_MSSQL_TIMESTAMP_BRACKETED
605            .replace_all(&result, "BLOB")
606            .to_string();
607        result = RE_ROWVERSION_ONLY.replace_all(&result, "BLOB").to_string();
608
609        // FLOAT → REAL
610        result = RE_FLOAT.replace_all(&result, "REAL").to_string();
611
612        // Strip MSSQL-specific clauses
613        result = RE_ON_PRIMARY.replace_all(&result, "").to_string();
614        result = RE_CLUSTERED.replace_all(&result, "").to_string();
615        result = RE_NONCLUSTERED.replace_all(&result, "").to_string();
616
617        result
618    }
619}
620
621// Pre-compiled regexes for type matching
622static RE_TINYINT_BOOL: Lazy<Regex> =
623    Lazy::new(|| Regex::new(r"(?i)\bTINYINT\s*\(\s*1\s*\)").unwrap());
624static RE_TINYINT: Lazy<Regex> =
625    Lazy::new(|| Regex::new(r"(?i)\bTINYINT\s*(\(\s*\d+\s*\))?").unwrap());
626static RE_SMALLINT: Lazy<Regex> =
627    Lazy::new(|| Regex::new(r"(?i)\bSMALLINT\s*(\(\s*\d+\s*\))?").unwrap());
628static RE_MEDIUMINT: Lazy<Regex> =
629    Lazy::new(|| Regex::new(r"(?i)\bMEDIUMINT\s*(\(\s*\d+\s*\))?").unwrap());
630static RE_INT_SIZE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bINT\s*\(\s*\d+\s*\)").unwrap());
631static RE_BIGINT_SIZE: Lazy<Regex> =
632    Lazy::new(|| Regex::new(r"(?i)\bBIGINT\s*\(\s*\d+\s*\)").unwrap());
633
634static RE_DOUBLE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bDOUBLE\b").unwrap());
635static RE_FLOAT: Lazy<Regex> =
636    Lazy::new(|| Regex::new(r"(?i)\bFLOAT\s*(\(\s*\d+\s*(,\s*\d+\s*)?\))?").unwrap());
637static RE_DECIMAL: Lazy<Regex> =
638    Lazy::new(|| Regex::new(r"(?i)\bDECIMAL\s*\(\s*\d+\s*(,\s*\d+\s*)?\)").unwrap());
639
640static RE_LONGTEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bLONGTEXT\b").unwrap());
641static RE_MEDIUMTEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bMEDIUMTEXT\b").unwrap());
642static RE_TINYTEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bTINYTEXT\b").unwrap());
643static RE_VARCHAR: Lazy<Regex> =
644    Lazy::new(|| Regex::new(r"(?i)\bVARCHAR\s*\(\s*\d+\s*\)").unwrap());
645static RE_CHAR: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bCHAR\s*\(\s*\d+\s*\)").unwrap());
646
647static RE_LONGBLOB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bLONGBLOB\b").unwrap());
648static RE_MEDIUMBLOB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bMEDIUMBLOB\b").unwrap());
649static RE_TINYBLOB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bTINYBLOB\b").unwrap());
650static RE_BLOB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBLOB\b").unwrap());
651static RE_VARBINARY: Lazy<Regex> =
652    Lazy::new(|| Regex::new(r"(?i)\bVARBINARY\s*\(\s*\d+\s*\)").unwrap());
653static RE_BINARY: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBINARY\s*\(\s*\d+\s*\)").unwrap());
654
655static RE_DATETIME: Lazy<Regex> =
656    Lazy::new(|| Regex::new(r"(?i)\bDATETIME(\(\s*\d+\s*\))?").unwrap());
657static RE_TIMESTAMP: Lazy<Regex> =
658    Lazy::new(|| Regex::new(r"(?i)\bTIMESTAMP\s*(\(\s*\d+\s*\))?").unwrap());
659static RE_DATE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bDATE\b").unwrap());
660static RE_TIME: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bTIME\s*(\(\s*\d+\s*\))?").unwrap());
661
662static RE_JSON: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bJSON\b").unwrap());
663
664static RE_ENUM: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bENUM\s*\([^)]+\)").unwrap());
665static RE_SET: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bSET\s*\([^)]+\)").unwrap());
666
667static RE_UNSIGNED: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\s+UNSIGNED\b").unwrap());
668static RE_ZEROFILL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\s+ZEROFILL\b").unwrap());
669
670// PostgreSQL specific types
671static RE_SERIAL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bSERIAL\b").unwrap());
672static RE_BIGSERIAL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBIGSERIAL\b").unwrap());
673static RE_SMALLSERIAL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bSMALLSERIAL\b").unwrap());
674static RE_BYTEA: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBYTEA\b").unwrap());
675static RE_DOUBLE_PRECISION: Lazy<Regex> =
676    Lazy::new(|| Regex::new(r"(?i)\bDOUBLE\s+PRECISION\b").unwrap());
677static RE_REAL: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bREAL\b").unwrap());
678static RE_BOOLEAN: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBOOLEAN\b").unwrap());
679static RE_TIMESTAMPTZ: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bTIMESTAMPTZ\b").unwrap());
680static RE_TIMESTAMP_WITH_TZ: Lazy<Regex> =
681    Lazy::new(|| Regex::new(r"(?i)\bTIMESTAMP\s+WITH\s+TIME\s+ZONE\b").unwrap());
682static RE_TIMESTAMP_NO_TZ: Lazy<Regex> =
683    Lazy::new(|| Regex::new(r"(?i)\bTIMESTAMP\s+WITHOUT\s+TIME\s+ZONE\b").unwrap());
684static RE_JSONB: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bJSONB\b").unwrap());
685static RE_UUID: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bUUID\b").unwrap());
686static RE_TEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bTEXT\b").unwrap());
687
688// MSSQL specific types
689static RE_BIT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bBIT\b").unwrap());
690static RE_NVARCHAR_MAX: Lazy<Regex> =
691    Lazy::new(|| Regex::new(r"(?i)\bNVARCHAR\s*\(\s*MAX\s*\)").unwrap());
692static RE_NVARCHAR: Lazy<Regex> =
693    Lazy::new(|| Regex::new(r"(?i)\bNVARCHAR\s*(\(\s*\d+\s*\))").unwrap());
694static RE_NCHAR: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bNCHAR\s*(\(\s*\d+\s*\))").unwrap());
695static RE_NTEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bNTEXT\b").unwrap());
696static RE_VARCHAR_MAX: Lazy<Regex> =
697    Lazy::new(|| Regex::new(r"(?i)\bVARCHAR\s*\(\s*MAX\s*\)").unwrap());
698static RE_VARBINARY_MAX: Lazy<Regex> =
699    Lazy::new(|| Regex::new(r"(?i)\bVARBINARY\s*\(\s*MAX\s*\)").unwrap());
700static RE_IMAGE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bIMAGE\b").unwrap());
701static RE_DATETIME2: Lazy<Regex> =
702    Lazy::new(|| Regex::new(r"(?i)\bDATETIME2\s*(\(\s*\d+\s*\))?").unwrap());
703static RE_DATETIMEOFFSET: Lazy<Regex> =
704    Lazy::new(|| Regex::new(r"(?i)\bDATETIMEOFFSET\s*(\(\s*\d+\s*\))?").unwrap());
705static RE_SMALLDATETIME: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bSMALLDATETIME\b").unwrap());
706static RE_MONEY: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bMONEY\b").unwrap());
707static RE_SMALLMONEY: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bSMALLMONEY\b").unwrap());
708static RE_UNIQUEIDENTIFIER: Lazy<Regex> =
709    Lazy::new(|| Regex::new(r"(?i)\bUNIQUEIDENTIFIER\b").unwrap());
710static RE_XML: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bXML\b").unwrap());
711// MSSQL TIMESTAMP type (binary versioning, NOT datetime) - only match bracketed [TIMESTAMP]
712// or as column type after brackets. We can't match unbracketed standalone TIMESTAMP safely
713// because it would conflict with PostgreSQL TIMESTAMP result. So we rely on context.
714static RE_MSSQL_TIMESTAMP_BRACKETED: Lazy<Regex> =
715    Lazy::new(|| Regex::new(r"(?i)\[\s*TIMESTAMP\s*\]").unwrap());
716static RE_ROWVERSION_ONLY: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bROWVERSION\b").unwrap());
717
718// MSSQL-specific clauses to strip when converting to other dialects
719static RE_ON_PRIMARY: Lazy<Regex> =
720    Lazy::new(|| Regex::new(r"(?i)\s*ON\s*\[\s*PRIMARY\s*\]").unwrap());
721static RE_CLUSTERED: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bCLUSTERED\s+").unwrap());
722static RE_NONCLUSTERED: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\bNONCLUSTERED\s+").unwrap());