1use once_cell::sync::Lazy;
4use regex::Regex;
5
6#[derive(Debug, Clone, PartialEq)]
8#[allow(dead_code)]
9pub enum DuckDBType {
10 Boolean,
11 TinyInt,
12 SmallInt,
13 Integer,
14 BigInt,
15 Float,
16 Double,
17 Decimal(Option<u8>, Option<u8>),
18 Varchar(Option<u32>),
19 Text,
20 Blob,
21 Date,
22 Time,
23 Timestamp,
24 Interval,
25 Uuid,
26 Json,
27}
28
29impl std::fmt::Display for DuckDBType {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 match self {
32 DuckDBType::Boolean => write!(f, "BOOLEAN"),
33 DuckDBType::TinyInt => write!(f, "TINYINT"),
34 DuckDBType::SmallInt => write!(f, "SMALLINT"),
35 DuckDBType::Integer => write!(f, "INTEGER"),
36 DuckDBType::BigInt => write!(f, "BIGINT"),
37 DuckDBType::Float => write!(f, "FLOAT"),
38 DuckDBType::Double => write!(f, "DOUBLE"),
39 DuckDBType::Decimal(p, s) => match (p, s) {
40 (Some(p), Some(s)) => write!(f, "DECIMAL({}, {})", p, s),
41 (Some(p), None) => write!(f, "DECIMAL({})", p),
42 _ => write!(f, "DECIMAL"),
43 },
44 DuckDBType::Varchar(len) => match len {
45 Some(n) => write!(f, "VARCHAR({})", n),
46 None => write!(f, "VARCHAR"),
47 },
48 DuckDBType::Text => write!(f, "TEXT"),
49 DuckDBType::Blob => write!(f, "BLOB"),
50 DuckDBType::Date => write!(f, "DATE"),
51 DuckDBType::Time => write!(f, "TIME"),
52 DuckDBType::Timestamp => write!(f, "TIMESTAMP"),
53 DuckDBType::Interval => write!(f, "INTERVAL"),
54 DuckDBType::Uuid => write!(f, "UUID"),
55 DuckDBType::Json => write!(f, "JSON"),
56 }
57 }
58}
59
60pub struct TypeConverter;
62
63impl TypeConverter {
64 pub fn convert(type_str: &str) -> String {
66 let upper = type_str.to_uppercase();
67 let trimmed = upper.trim();
68
69 match trimmed {
71 "BOOL" | "BOOLEAN" => "BOOLEAN".to_string(),
73 "TINYINT(1)" => "BOOLEAN".to_string(),
74
75 "TINYINT" => "TINYINT".to_string(),
77 "SMALLINT" => "SMALLINT".to_string(),
78 "MEDIUMINT" => "INTEGER".to_string(),
79 "INT" | "INTEGER" => "INTEGER".to_string(),
80 "BIGINT" => "BIGINT".to_string(),
81
82 "TINYINT UNSIGNED" => "UTINYINT".to_string(),
84 "SMALLINT UNSIGNED" => "USMALLINT".to_string(),
85 "MEDIUMINT UNSIGNED" => "UINTEGER".to_string(),
86 "INT UNSIGNED" | "INTEGER UNSIGNED" => "UINTEGER".to_string(),
87 "BIGINT UNSIGNED" => "UBIGINT".to_string(),
88
89 "SERIAL" => "INTEGER".to_string(),
91 "BIGSERIAL" => "BIGINT".to_string(),
92 "SMALLSERIAL" => "SMALLINT".to_string(),
93
94 "FLOAT" | "FLOAT4" | "REAL" => "FLOAT".to_string(),
96 "DOUBLE" | "DOUBLE PRECISION" | "FLOAT8" => "DOUBLE".to_string(),
97
98 "DECIMAL" | "NUMERIC" | "DEC" | "FIXED" => "DECIMAL".to_string(),
100 "MONEY" => "DECIMAL(19,4)".to_string(),
101
102 "CHAR" => "VARCHAR".to_string(),
104 "VARCHAR" | "CHARACTER VARYING" => "VARCHAR".to_string(),
105 "TINYTEXT" => "VARCHAR(255)".to_string(),
106 "TEXT" | "MEDIUMTEXT" | "LONGTEXT" => "TEXT".to_string(),
107
108 "BINARY" | "VARBINARY" => "BLOB".to_string(),
110 "TINYBLOB" | "BLOB" | "MEDIUMBLOB" | "LONGBLOB" => "BLOB".to_string(),
111 "BYTEA" => "BLOB".to_string(),
112
113 "DATE" => "DATE".to_string(),
115 "TIME" | "TIME WITHOUT TIME ZONE" => "TIME".to_string(),
116 "TIMETZ" | "TIME WITH TIME ZONE" => "TIMETZ".to_string(),
117 "DATETIME" => "TIMESTAMP".to_string(),
118 "TIMESTAMP" | "TIMESTAMP WITHOUT TIME ZONE" => "TIMESTAMP".to_string(),
119 "TIMESTAMPTZ" | "TIMESTAMP WITH TIME ZONE" => "TIMESTAMPTZ".to_string(),
120 "YEAR" => "SMALLINT".to_string(),
121 "INTERVAL" => "INTERVAL".to_string(),
122
123 "JSON" | "JSONB" => "JSON".to_string(),
125
126 "UUID" => "UUID".to_string(),
128
129 "BIT" => "BOOLEAN".to_string(),
131
132 "NCHAR" => "VARCHAR".to_string(),
134 "NVARCHAR" => "VARCHAR".to_string(),
135 "NTEXT" => "TEXT".to_string(),
136 "DATETIME2" => "TIMESTAMP".to_string(),
137 "SMALLDATETIME" => "TIMESTAMP".to_string(),
138 "DATETIMEOFFSET" => "TIMESTAMPTZ".to_string(),
139 "UNIQUEIDENTIFIER" => "UUID".to_string(),
140 "IMAGE" => "BLOB".to_string(),
141 "ROWVERSION" => "BLOB".to_string(),
142 "SMALLMONEY" => "DECIMAL(10,4)".to_string(),
143 "SQL_VARIANT" => "VARCHAR".to_string(),
144 "XML" => "TEXT".to_string(),
145 "HIERARCHYID" => "VARCHAR".to_string(),
146 "GEOGRAPHY" => "BLOB".to_string(),
147 "GEOMETRY" => "BLOB".to_string(),
148
149 _ => Self::convert_parameterized(trimmed),
150 }
151 }
152
153 fn convert_parameterized(type_str: &str) -> String {
155 static RE_PARAMETERIZED: Lazy<Regex> =
157 Lazy::new(|| Regex::new(r"^(\w+)\s*\(([^)]+)\)(.*)$").unwrap());
158
159 if let Some(caps) = RE_PARAMETERIZED.captures(type_str) {
160 let base_type = caps.get(1).map(|m| m.as_str()).unwrap_or("");
161 let params = caps.get(2).map(|m| m.as_str()).unwrap_or("");
162 let suffix = caps.get(3).map(|m| m.as_str()).unwrap_or("").trim();
163
164 let converted_base = match base_type {
165 "TINYINT" => {
167 if params == "1" {
168 return "BOOLEAN".to_string();
169 }
170 if suffix.contains("UNSIGNED") {
171 "UTINYINT"
172 } else {
173 "TINYINT"
174 }
175 }
176 "SMALLINT" => {
177 if suffix.contains("UNSIGNED") {
178 "USMALLINT"
179 } else {
180 "SMALLINT"
181 }
182 }
183 "MEDIUMINT" => {
184 if suffix.contains("UNSIGNED") {
185 "UINTEGER"
186 } else {
187 "INTEGER"
188 }
189 }
190 "INT" | "INTEGER" => {
191 if suffix.contains("UNSIGNED") {
192 "UINTEGER"
193 } else {
194 "INTEGER"
195 }
196 }
197 "BIGINT" => {
198 if suffix.contains("UNSIGNED") {
199 "UBIGINT"
200 } else {
201 "BIGINT"
202 }
203 }
204
205 "CHAR" | "CHARACTER" => {
207 return format!("VARCHAR({})", params);
208 }
209 "VARCHAR" | "CHARACTER VARYING" => {
210 if params.to_uppercase() == "MAX" {
212 return "TEXT".to_string();
213 }
214 return format!("VARCHAR({})", params);
215 }
216 "BINARY" | "VARBINARY" => {
217 if params.to_uppercase() == "MAX" {
219 return "BLOB".to_string();
220 }
221 return "BLOB".to_string();
222 }
223
224 "DECIMAL" | "NUMERIC" | "DEC" | "FIXED" => {
226 return format!("DECIMAL({})", params);
227 }
228
229 "FLOAT" => {
231 if let Ok(precision) = params.parse::<u32>() {
232 if precision <= 24 {
233 return "FLOAT".to_string();
234 } else {
235 return "DOUBLE".to_string();
236 }
237 }
238 return "FLOAT".to_string();
239 }
240 "DOUBLE" => return "DOUBLE".to_string(),
241
242 "TIME" => return "TIME".to_string(),
244 "TIMESTAMP" | "DATETIME" => return "TIMESTAMP".to_string(),
245
246 "BIT" => {
248 if params == "1" {
249 return "BOOLEAN".to_string();
250 }
251 return "BITSTRING".to_string();
252 }
253
254 "ENUM" => return "VARCHAR".to_string(),
256
257 "SET" => return "VARCHAR".to_string(),
259
260 "NVARCHAR" => {
262 if params.to_uppercase() == "MAX" {
263 return "TEXT".to_string();
264 }
265 return format!("VARCHAR({})", params);
266 }
267 "NCHAR" => return format!("VARCHAR({})", params),
268
269 "DATETIME2" => return "TIMESTAMP".to_string(),
271
272 _ => return type_str.to_string(),
273 };
274
275 return converted_base.to_string();
276 }
277
278 type_str.to_string()
280 }
281}
282
283#[cfg(test)]
284mod tests {
285 use super::*;
286
287 #[test]
288 fn test_basic_type_conversion() {
289 assert_eq!(TypeConverter::convert("INT"), "INTEGER");
290 assert_eq!(TypeConverter::convert("BIGINT"), "BIGINT");
291 assert_eq!(TypeConverter::convert("VARCHAR"), "VARCHAR");
292 assert_eq!(TypeConverter::convert("TEXT"), "TEXT");
293 assert_eq!(TypeConverter::convert("DATETIME"), "TIMESTAMP");
294 assert_eq!(TypeConverter::convert("BYTEA"), "BLOB");
295 }
296
297 #[test]
298 fn test_parameterized_types() {
299 assert_eq!(TypeConverter::convert("VARCHAR(255)"), "VARCHAR(255)");
300 assert_eq!(TypeConverter::convert("DECIMAL(10,2)"), "DECIMAL(10,2)");
301 assert_eq!(TypeConverter::convert("CHAR(1)"), "VARCHAR(1)");
302 assert_eq!(TypeConverter::convert("TINYINT(1)"), "BOOLEAN");
303 }
304
305 #[test]
306 fn test_unsigned_types() {
307 assert_eq!(TypeConverter::convert("INT UNSIGNED"), "UINTEGER");
308 assert_eq!(TypeConverter::convert("BIGINT UNSIGNED"), "UBIGINT");
309 assert_eq!(TypeConverter::convert("TINYINT(3) UNSIGNED"), "UTINYINT");
310 }
311
312 #[test]
313 fn test_mysql_specific() {
314 assert_eq!(TypeConverter::convert("MEDIUMINT"), "INTEGER");
315 assert_eq!(TypeConverter::convert("LONGTEXT"), "TEXT");
316 assert_eq!(TypeConverter::convert("MEDIUMBLOB"), "BLOB");
317 assert_eq!(TypeConverter::convert("YEAR"), "SMALLINT");
318 }
319
320 #[test]
321 fn test_postgres_specific() {
322 assert_eq!(TypeConverter::convert("SERIAL"), "INTEGER");
323 assert_eq!(TypeConverter::convert("BIGSERIAL"), "BIGINT");
324 assert_eq!(TypeConverter::convert("JSONB"), "JSON");
325 assert_eq!(TypeConverter::convert("UUID"), "UUID");
326 }
327
328 #[test]
329 fn test_enum_set() {
330 assert_eq!(TypeConverter::convert("ENUM('a','b','c')"), "VARCHAR");
331 assert_eq!(TypeConverter::convert("SET('x','y')"), "VARCHAR");
332 }
333}