1use once_cell::sync::Lazy;
4use regex::Regex;
5
6#[derive(Debug, Clone, PartialEq)]
8pub enum DuckDBType {
9 Boolean,
10 TinyInt,
11 SmallInt,
12 Integer,
13 BigInt,
14 Float,
15 Double,
16 Decimal(Option<u8>, Option<u8>),
17 Varchar(Option<u32>),
18 Text,
19 Blob,
20 Date,
21 Time,
22 Timestamp,
23 Interval,
24 Uuid,
25 Json,
26}
27
28impl std::fmt::Display for DuckDBType {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 match self {
31 DuckDBType::Boolean => write!(f, "BOOLEAN"),
32 DuckDBType::TinyInt => write!(f, "TINYINT"),
33 DuckDBType::SmallInt => write!(f, "SMALLINT"),
34 DuckDBType::Integer => write!(f, "INTEGER"),
35 DuckDBType::BigInt => write!(f, "BIGINT"),
36 DuckDBType::Float => write!(f, "FLOAT"),
37 DuckDBType::Double => write!(f, "DOUBLE"),
38 DuckDBType::Decimal(p, s) => match (p, s) {
39 (Some(p), Some(s)) => write!(f, "DECIMAL({}, {})", p, s),
40 (Some(p), None) => write!(f, "DECIMAL({})", p),
41 _ => write!(f, "DECIMAL"),
42 },
43 DuckDBType::Varchar(len) => match len {
44 Some(n) => write!(f, "VARCHAR({})", n),
45 None => write!(f, "VARCHAR"),
46 },
47 DuckDBType::Text => write!(f, "TEXT"),
48 DuckDBType::Blob => write!(f, "BLOB"),
49 DuckDBType::Date => write!(f, "DATE"),
50 DuckDBType::Time => write!(f, "TIME"),
51 DuckDBType::Timestamp => write!(f, "TIMESTAMP"),
52 DuckDBType::Interval => write!(f, "INTERVAL"),
53 DuckDBType::Uuid => write!(f, "UUID"),
54 DuckDBType::Json => write!(f, "JSON"),
55 }
56 }
57}
58
59pub struct TypeConverter;
61
62impl TypeConverter {
63 pub fn convert(type_str: &str) -> String {
65 let upper = type_str.to_uppercase();
66 let trimmed = upper.trim();
67
68 match trimmed {
70 "BOOL" | "BOOLEAN" => "BOOLEAN".to_string(),
72 "TINYINT(1)" => "BOOLEAN".to_string(),
73
74 "TINYINT" => "TINYINT".to_string(),
76 "SMALLINT" => "SMALLINT".to_string(),
77 "MEDIUMINT" => "INTEGER".to_string(),
78 "INT" | "INTEGER" => "INTEGER".to_string(),
79 "BIGINT" => "BIGINT".to_string(),
80
81 "TINYINT UNSIGNED" => "UTINYINT".to_string(),
83 "SMALLINT UNSIGNED" => "USMALLINT".to_string(),
84 "MEDIUMINT UNSIGNED" => "UINTEGER".to_string(),
85 "INT UNSIGNED" | "INTEGER UNSIGNED" => "UINTEGER".to_string(),
86 "BIGINT UNSIGNED" => "UBIGINT".to_string(),
87
88 "SERIAL" => "INTEGER".to_string(),
90 "BIGSERIAL" => "BIGINT".to_string(),
91 "SMALLSERIAL" => "SMALLINT".to_string(),
92
93 "FLOAT" | "FLOAT4" | "REAL" => "FLOAT".to_string(),
95 "DOUBLE" | "DOUBLE PRECISION" | "FLOAT8" => "DOUBLE".to_string(),
96
97 "DECIMAL" | "NUMERIC" | "DEC" | "FIXED" => "DECIMAL".to_string(),
99 "MONEY" => "DECIMAL(19,4)".to_string(),
100
101 "CHAR" => "VARCHAR".to_string(),
103 "VARCHAR" | "CHARACTER VARYING" => "VARCHAR".to_string(),
104 "TINYTEXT" => "VARCHAR(255)".to_string(),
105 "TEXT" | "MEDIUMTEXT" | "LONGTEXT" => "TEXT".to_string(),
106
107 "BINARY" | "VARBINARY" => "BLOB".to_string(),
109 "TINYBLOB" | "BLOB" | "MEDIUMBLOB" | "LONGBLOB" => "BLOB".to_string(),
110 "BYTEA" => "BLOB".to_string(),
111
112 "DATE" => "DATE".to_string(),
114 "TIME" | "TIME WITHOUT TIME ZONE" => "TIME".to_string(),
115 "TIMETZ" | "TIME WITH TIME ZONE" => "TIMETZ".to_string(),
116 "DATETIME" => "TIMESTAMP".to_string(),
117 "TIMESTAMP" | "TIMESTAMP WITHOUT TIME ZONE" => "TIMESTAMP".to_string(),
118 "TIMESTAMPTZ" | "TIMESTAMP WITH TIME ZONE" => "TIMESTAMPTZ".to_string(),
119 "YEAR" => "SMALLINT".to_string(),
120 "INTERVAL" => "INTERVAL".to_string(),
121
122 "JSON" | "JSONB" => "JSON".to_string(),
124
125 "UUID" => "UUID".to_string(),
127
128 "BIT" => "BOOLEAN".to_string(),
130
131 "NCHAR" => "VARCHAR".to_string(),
133 "NVARCHAR" => "VARCHAR".to_string(),
134 "NTEXT" => "TEXT".to_string(),
135 "DATETIME2" => "TIMESTAMP".to_string(),
136 "SMALLDATETIME" => "TIMESTAMP".to_string(),
137 "DATETIMEOFFSET" => "TIMESTAMPTZ".to_string(),
138 "UNIQUEIDENTIFIER" => "UUID".to_string(),
139 "IMAGE" => "BLOB".to_string(),
140 "ROWVERSION" => "BLOB".to_string(),
141 "SMALLMONEY" => "DECIMAL(10,4)".to_string(),
142 "SQL_VARIANT" => "VARCHAR".to_string(),
143 "XML" => "TEXT".to_string(),
144 "HIERARCHYID" => "VARCHAR".to_string(),
145 "GEOGRAPHY" => "BLOB".to_string(),
146 "GEOMETRY" => "BLOB".to_string(),
147
148 _ => Self::convert_parameterized(trimmed),
149 }
150 }
151
152 fn convert_parameterized(type_str: &str) -> String {
154 static RE_PARAMETERIZED: Lazy<Regex> =
156 Lazy::new(|| Regex::new(r"^(\w+)\s*\(([^)]+)\)(.*)$").unwrap());
157
158 if let Some(caps) = RE_PARAMETERIZED.captures(type_str) {
159 let base_type = caps.get(1).map(|m| m.as_str()).unwrap_or("");
160 let params = caps.get(2).map(|m| m.as_str()).unwrap_or("");
161 let suffix = caps.get(3).map(|m| m.as_str()).unwrap_or("").trim();
162
163 let converted_base = match base_type {
164 "TINYINT" => {
166 if params == "1" {
167 return "BOOLEAN".to_string();
168 }
169 if suffix.contains("UNSIGNED") {
170 "UTINYINT"
171 } else {
172 "TINYINT"
173 }
174 }
175 "SMALLINT" => {
176 if suffix.contains("UNSIGNED") {
177 "USMALLINT"
178 } else {
179 "SMALLINT"
180 }
181 }
182 "MEDIUMINT" => {
183 if suffix.contains("UNSIGNED") {
184 "UINTEGER"
185 } else {
186 "INTEGER"
187 }
188 }
189 "INT" | "INTEGER" => {
190 if suffix.contains("UNSIGNED") {
191 "UINTEGER"
192 } else {
193 "INTEGER"
194 }
195 }
196 "BIGINT" => {
197 if suffix.contains("UNSIGNED") {
198 "UBIGINT"
199 } else {
200 "BIGINT"
201 }
202 }
203
204 "CHAR" | "CHARACTER" => {
206 return format!("VARCHAR({})", params);
207 }
208 "VARCHAR" | "CHARACTER VARYING" => {
209 if params.to_uppercase() == "MAX" {
211 return "TEXT".to_string();
212 }
213 return format!("VARCHAR({})", params);
214 }
215 "BINARY" | "VARBINARY" => {
216 if params.to_uppercase() == "MAX" {
218 return "BLOB".to_string();
219 }
220 return "BLOB".to_string();
221 }
222
223 "DECIMAL" | "NUMERIC" | "DEC" | "FIXED" => {
225 return format!("DECIMAL({})", params);
226 }
227
228 "FLOAT" => {
230 if let Ok(precision) = params.parse::<u32>() {
231 if precision <= 24 {
232 return "FLOAT".to_string();
233 } else {
234 return "DOUBLE".to_string();
235 }
236 }
237 return "FLOAT".to_string();
238 }
239 "DOUBLE" => return "DOUBLE".to_string(),
240
241 "TIME" => return "TIME".to_string(),
243 "TIMESTAMP" | "DATETIME" => return "TIMESTAMP".to_string(),
244
245 "BIT" => {
247 if params == "1" {
248 return "BOOLEAN".to_string();
249 }
250 return "BITSTRING".to_string();
251 }
252
253 "ENUM" => return "VARCHAR".to_string(),
255
256 "SET" => return "VARCHAR".to_string(),
258
259 "NVARCHAR" => {
261 if params.to_uppercase() == "MAX" {
262 return "TEXT".to_string();
263 }
264 return format!("VARCHAR({})", params);
265 }
266 "NCHAR" => return format!("VARCHAR({})", params),
267
268 "DATETIME2" => return "TIMESTAMP".to_string(),
270
271 _ => return type_str.to_string(),
272 };
273
274 return converted_base.to_string();
275 }
276
277 type_str.to_string()
279 }
280
281 pub fn convert_column_def(column_def: &str) -> String {
283 let mut result = column_def.to_string();
285
286 static RE_TYPE: Lazy<Regex> = Lazy::new(|| {
288 Regex::new(r"(?i)^(\s*`?[^`\s]+`?\s+)([A-Z][A-Z0-9_\s(),']+?)(\s+|$)").unwrap()
289 });
290
291 if let Some(caps) = RE_TYPE.captures(&result) {
292 if let Some(type_match) = caps.get(2) {
293 let original_type = type_match.as_str().trim();
294 let converted_type = Self::convert(original_type);
295 result = result.replacen(original_type, &converted_type, 1);
296 }
297 }
298
299 result = result.replace("AUTO_INCREMENT", "");
301 result = result.replace("auto_increment", "");
302
303 result = result.replace(" UNSIGNED", "");
305 result = result.replace(" unsigned", "");
306
307 result = result.replace(" ZEROFILL", "");
309 result = result.replace(" zerofill", "");
310
311 static RE_ON_UPDATE: Lazy<Regex> =
313 Lazy::new(|| Regex::new(r"(?i)\s*ON\s+UPDATE\s+CURRENT_TIMESTAMP").unwrap());
314 result = RE_ON_UPDATE.replace_all(&result, "").to_string();
315
316 static RE_CHARSET: Lazy<Regex> =
318 Lazy::new(|| Regex::new(r"(?i)\s*CHARACTER\s+SET\s+\w+").unwrap());
319 result = RE_CHARSET.replace_all(&result, "").to_string();
320
321 static RE_COLLATE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\s*COLLATE\s+\w+").unwrap());
323 result = RE_COLLATE.replace_all(&result, "").to_string();
324
325 static RE_SPACES: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s+").unwrap());
327 result = RE_SPACES.replace_all(&result, " ").trim().to_string();
328
329 result
330 }
331}
332
333#[cfg(test)]
334mod tests {
335 use super::*;
336
337 #[test]
338 fn test_basic_type_conversion() {
339 assert_eq!(TypeConverter::convert("INT"), "INTEGER");
340 assert_eq!(TypeConverter::convert("BIGINT"), "BIGINT");
341 assert_eq!(TypeConverter::convert("VARCHAR"), "VARCHAR");
342 assert_eq!(TypeConverter::convert("TEXT"), "TEXT");
343 assert_eq!(TypeConverter::convert("DATETIME"), "TIMESTAMP");
344 assert_eq!(TypeConverter::convert("BYTEA"), "BLOB");
345 }
346
347 #[test]
348 fn test_parameterized_types() {
349 assert_eq!(TypeConverter::convert("VARCHAR(255)"), "VARCHAR(255)");
350 assert_eq!(TypeConverter::convert("DECIMAL(10,2)"), "DECIMAL(10,2)");
351 assert_eq!(TypeConverter::convert("CHAR(1)"), "VARCHAR(1)");
352 assert_eq!(TypeConverter::convert("TINYINT(1)"), "BOOLEAN");
353 }
354
355 #[test]
356 fn test_unsigned_types() {
357 assert_eq!(TypeConverter::convert("INT UNSIGNED"), "UINTEGER");
358 assert_eq!(TypeConverter::convert("BIGINT UNSIGNED"), "UBIGINT");
359 assert_eq!(TypeConverter::convert("TINYINT(3) UNSIGNED"), "UTINYINT");
360 }
361
362 #[test]
363 fn test_mysql_specific() {
364 assert_eq!(TypeConverter::convert("MEDIUMINT"), "INTEGER");
365 assert_eq!(TypeConverter::convert("LONGTEXT"), "TEXT");
366 assert_eq!(TypeConverter::convert("MEDIUMBLOB"), "BLOB");
367 assert_eq!(TypeConverter::convert("YEAR"), "SMALLINT");
368 }
369
370 #[test]
371 fn test_postgres_specific() {
372 assert_eq!(TypeConverter::convert("SERIAL"), "INTEGER");
373 assert_eq!(TypeConverter::convert("BIGSERIAL"), "BIGINT");
374 assert_eq!(TypeConverter::convert("JSONB"), "JSON");
375 assert_eq!(TypeConverter::convert("UUID"), "UUID");
376 }
377
378 #[test]
379 fn test_enum_set() {
380 assert_eq!(TypeConverter::convert("ENUM('a','b','c')"), "VARCHAR");
381 assert_eq!(TypeConverter::convert("SET('x','y')"), "VARCHAR");
382 }
383}