sqlparser/ast/data_type.rs
1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5// http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13#[cfg(not(feature = "std"))]
14use alloc::{boxed::Box, format, string::String, vec::Vec};
15use core::fmt;
16
17#[cfg(feature = "serde")]
18use serde::{Deserialize, Serialize};
19
20#[cfg(feature = "visitor")]
21use sqlparser_derive::{Visit, VisitMut};
22
23use crate::ast::ObjectName;
24
25use super::value::escape_single_quote_string;
26
27/// SQL data types
28#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
29#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
30#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
31pub enum DataType {
32 /// Fixed-length character type e.g. CHARACTER(10)
33 Character(Option<CharacterLength>),
34 /// Fixed-length char type e.g. CHAR(10)
35 Char(Option<CharacterLength>),
36 /// Character varying type e.g. CHARACTER VARYING(10)
37 CharacterVarying(Option<CharacterLength>),
38 /// Char varying type e.g. CHAR VARYING(10)
39 CharVarying(Option<CharacterLength>),
40 /// Variable-length character type e.g. VARCHAR(10)
41 Varchar(Option<CharacterLength>),
42 /// Variable-length character type e.g. NVARCHAR(10)
43 Nvarchar(Option<u64>),
44 /// Uuid type
45 Uuid,
46 /// Large character object with optional length e.g. CHARACTER LARGE OBJECT, CHARACTER LARGE OBJECT(1000), [standard]
47 ///
48 /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type
49 CharacterLargeObject(Option<u64>),
50 /// Large character object with optional length e.g. CHAR LARGE OBJECT, CHAR LARGE OBJECT(1000), [standard]
51 ///
52 /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type
53 CharLargeObject(Option<u64>),
54 /// Large character object with optional length e.g. CLOB, CLOB(1000), [standard]
55 ///
56 /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type
57 /// [Oracle]: https://docs.oracle.com/javadb/10.10.1.2/ref/rrefclob.html
58 Clob(Option<u64>),
59 /// Fixed-length binary type with optional length e.g. [standard], [MS SQL Server]
60 ///
61 /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-string-type
62 /// [MS SQL Server]: https://learn.microsoft.com/pt-br/sql/t-sql/data-types/binary-and-varbinary-transact-sql?view=sql-server-ver16
63 Binary(Option<u64>),
64 /// Variable-length binary with optional length type e.g. [standard], [MS SQL Server]
65 ///
66 /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-string-type
67 /// [MS SQL Server]: https://learn.microsoft.com/pt-br/sql/t-sql/data-types/binary-and-varbinary-transact-sql?view=sql-server-ver16
68 Varbinary(Option<u64>),
69 /// Large binary object with optional length e.g. BLOB, BLOB(1000), [standard], [Oracle]
70 ///
71 /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type
72 /// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html
73 Blob(Option<u64>),
74 /// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1]
75 ///
76 /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
77 Numeric(ExactNumberInfo),
78 /// Decimal type with optional precision and scale e.g. DECIMAL(10,2), [standard][1]
79 ///
80 /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
81 Decimal(ExactNumberInfo),
82 /// [BigNumeric] type used in BigQuery
83 ///
84 /// [BigNumeric]: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#bignumeric_literals
85 BigNumeric(ExactNumberInfo),
86 /// This is alias for `BigNumeric` type used in BigQuery
87 ///
88 /// [BigDecimal]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
89 BigDecimal(ExactNumberInfo),
90 /// Dec type with optional precision and scale e.g. DEC(10,2), [standard][1]
91 ///
92 /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
93 Dec(ExactNumberInfo),
94 /// Floating point with optional precision e.g. FLOAT(8)
95 Float(Option<u64>),
96 /// Tiny integer with optional display width e.g. TINYINT or TINYINT(3)
97 TinyInt(Option<u64>),
98 /// Unsigned tiny integer with optional display width e.g. TINYINT UNSIGNED or TINYINT(3) UNSIGNED
99 UnsignedTinyInt(Option<u64>),
100 /// Small integer with optional display width e.g. SMALLINT or SMALLINT(5)
101 SmallInt(Option<u64>),
102 /// Unsigned small integer with optional display width e.g. SMALLINT UNSIGNED or SMALLINT(5) UNSIGNED
103 UnsignedSmallInt(Option<u64>),
104 /// MySQL medium integer ([1]) with optional display width e.g. MEDIUMINT or MEDIUMINT(5)
105 ///
106 /// [1]: https://dev.mysql.com/doc/refman/8.0/en/integer-types.html
107 MediumInt(Option<u64>),
108 /// Unsigned medium integer ([1]) with optional display width e.g. MEDIUMINT UNSIGNED or MEDIUMINT(5) UNSIGNED
109 ///
110 /// [1]: https://dev.mysql.com/doc/refman/8.0/en/integer-types.html
111 UnsignedMediumInt(Option<u64>),
112 /// Integer with optional display width e.g. INT or INT(11)
113 Int(Option<u64>),
114 /// Integer with optional display width e.g. INTEGER or INTEGER(11)
115 Integer(Option<u64>),
116 /// Unsigned integer with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
117 UnsignedInt(Option<u64>),
118 /// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED
119 UnsignedInteger(Option<u64>),
120 /// Big integer with optional display width e.g. BIGINT or BIGINT(20)
121 BigInt(Option<u64>),
122 /// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED
123 UnsignedBigInt(Option<u64>),
124 /// Floating point e.g. REAL
125 Real,
126 /// Double
127 Double,
128 /// Double PRECISION e.g. [standard], [postgresql]
129 ///
130 /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#approximate-numeric-type
131 /// [postgresql]: https://www.postgresql.org/docs/current/datatype-numeric.html
132 DoublePrecision,
133 /// Boolean
134 Boolean,
135 /// Date
136 Date,
137 /// Time with optional time precision and time zone information e.g. [standard][1].
138 ///
139 /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
140 Time(Option<u64>, TimezoneInfo),
141 /// Datetime with optional time precision e.g. [MySQL][1].
142 ///
143 /// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html
144 Datetime(Option<u64>),
145 /// Timestamp with optional time precision and time zone information e.g. [standard][1].
146 ///
147 /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
148 Timestamp(Option<u64>, TimezoneInfo),
149 /// Interval
150 Interval,
151 /// JSON type used in BigQuery
152 JSON,
153 /// Regclass used in postgresql serial
154 Regclass,
155 /// Text
156 Text,
157 /// String
158 String,
159 /// Bytea
160 Bytea,
161 /// Custom type such as enums
162 Custom(ObjectName, Vec<String>),
163 /// Arrays
164 Array(Option<Box<DataType>>),
165 /// Enums
166 Enum(Vec<String>),
167 /// Set
168 Set(Vec<String>),
169}
170
171impl fmt::Display for DataType {
172 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
173 match self {
174 DataType::Character(size) => format_character_string_type(f, "CHARACTER", size),
175 DataType::Char(size) => format_character_string_type(f, "CHAR", size),
176 DataType::CharacterVarying(size) => {
177 format_character_string_type(f, "CHARACTER VARYING", size)
178 }
179
180 DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size),
181 DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size),
182 DataType::Nvarchar(size) => {
183 format_type_with_optional_length(f, "NVARCHAR", size, false)
184 }
185 DataType::Uuid => write!(f, "UUID"),
186 DataType::CharacterLargeObject(size) => {
187 format_type_with_optional_length(f, "CHARACTER LARGE OBJECT", size, false)
188 }
189 DataType::CharLargeObject(size) => {
190 format_type_with_optional_length(f, "CHAR LARGE OBJECT", size, false)
191 }
192 DataType::Clob(size) => format_type_with_optional_length(f, "CLOB", size, false),
193 DataType::Binary(size) => format_type_with_optional_length(f, "BINARY", size, false),
194 DataType::Varbinary(size) => {
195 format_type_with_optional_length(f, "VARBINARY", size, false)
196 }
197 DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false),
198 DataType::Numeric(info) => {
199 write!(f, "NUMERIC{info}")
200 }
201 DataType::Decimal(info) => {
202 write!(f, "DECIMAL{info}")
203 }
204 DataType::Dec(info) => {
205 write!(f, "DEC{info}")
206 }
207 DataType::BigNumeric(info) => write!(f, "BIGNUMERIC{info}"),
208 DataType::BigDecimal(info) => write!(f, "BIGDECIMAL{info}"),
209 DataType::Float(size) => format_type_with_optional_length(f, "FLOAT", size, false),
210 DataType::TinyInt(zerofill) => {
211 format_type_with_optional_length(f, "TINYINT", zerofill, false)
212 }
213 DataType::UnsignedTinyInt(zerofill) => {
214 format_type_with_optional_length(f, "TINYINT", zerofill, true)
215 }
216 DataType::SmallInt(zerofill) => {
217 format_type_with_optional_length(f, "SMALLINT", zerofill, false)
218 }
219 DataType::UnsignedSmallInt(zerofill) => {
220 format_type_with_optional_length(f, "SMALLINT", zerofill, true)
221 }
222 DataType::MediumInt(zerofill) => {
223 format_type_with_optional_length(f, "MEDIUMINT", zerofill, false)
224 }
225 DataType::UnsignedMediumInt(zerofill) => {
226 format_type_with_optional_length(f, "MEDIUMINT", zerofill, true)
227 }
228 DataType::Int(zerofill) => format_type_with_optional_length(f, "INT", zerofill, false),
229 DataType::UnsignedInt(zerofill) => {
230 format_type_with_optional_length(f, "INT", zerofill, true)
231 }
232 DataType::Integer(zerofill) => {
233 format_type_with_optional_length(f, "INTEGER", zerofill, false)
234 }
235 DataType::UnsignedInteger(zerofill) => {
236 format_type_with_optional_length(f, "INTEGER", zerofill, true)
237 }
238 DataType::BigInt(zerofill) => {
239 format_type_with_optional_length(f, "BIGINT", zerofill, false)
240 }
241 DataType::UnsignedBigInt(zerofill) => {
242 format_type_with_optional_length(f, "BIGINT", zerofill, true)
243 }
244 DataType::Real => write!(f, "REAL"),
245 DataType::Double => write!(f, "DOUBLE"),
246 DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"),
247 DataType::Boolean => write!(f, "BOOLEAN"),
248 DataType::Date => write!(f, "DATE"),
249 DataType::Time(precision, timezone_info) => {
250 format_datetime_precision_and_tz(f, "TIME", precision, timezone_info)
251 }
252 DataType::Datetime(precision) => {
253 format_type_with_optional_length(f, "DATETIME", precision, false)
254 }
255 DataType::Timestamp(precision, timezone_info) => {
256 format_datetime_precision_and_tz(f, "TIMESTAMP", precision, timezone_info)
257 }
258 DataType::Interval => write!(f, "INTERVAL"),
259 DataType::JSON => write!(f, "JSON"),
260 DataType::Regclass => write!(f, "REGCLASS"),
261 DataType::Text => write!(f, "TEXT"),
262 DataType::String => write!(f, "STRING"),
263 DataType::Bytea => write!(f, "BYTEA"),
264 DataType::Array(ty) => {
265 if let Some(t) = &ty {
266 write!(f, "{t}[]")
267 } else {
268 write!(f, "ARRAY")
269 }
270 }
271 DataType::Custom(ty, modifiers) => {
272 if modifiers.is_empty() {
273 write!(f, "{ty}")
274 } else {
275 write!(f, "{}({})", ty, modifiers.join(", "))
276 }
277 }
278 DataType::Enum(vals) => {
279 write!(f, "ENUM(")?;
280 for (i, v) in vals.iter().enumerate() {
281 if i != 0 {
282 write!(f, ", ")?;
283 }
284 write!(f, "'{}'", escape_single_quote_string(v))?;
285 }
286 write!(f, ")")
287 }
288 DataType::Set(vals) => {
289 write!(f, "SET(")?;
290 for (i, v) in vals.iter().enumerate() {
291 if i != 0 {
292 write!(f, ", ")?;
293 }
294 write!(f, "'{}'", escape_single_quote_string(v))?;
295 }
296 write!(f, ")")
297 }
298 }
299 }
300}
301
302fn format_type_with_optional_length(
303 f: &mut fmt::Formatter,
304 sql_type: &'static str,
305 len: &Option<u64>,
306 unsigned: bool,
307) -> fmt::Result {
308 write!(f, "{sql_type}")?;
309 if let Some(len) = len {
310 write!(f, "({len})")?;
311 }
312 if unsigned {
313 write!(f, " UNSIGNED")?;
314 }
315 Ok(())
316}
317
318fn format_character_string_type(
319 f: &mut fmt::Formatter,
320 sql_type: &str,
321 size: &Option<CharacterLength>,
322) -> fmt::Result {
323 write!(f, "{sql_type}")?;
324 if let Some(size) = size {
325 write!(f, "({size})")?;
326 }
327 Ok(())
328}
329
330fn format_datetime_precision_and_tz(
331 f: &mut fmt::Formatter,
332 sql_type: &'static str,
333 len: &Option<u64>,
334 time_zone: &TimezoneInfo,
335) -> fmt::Result {
336 write!(f, "{sql_type}")?;
337 let len_fmt = len.as_ref().map(|l| format!("({l})")).unwrap_or_default();
338
339 match time_zone {
340 TimezoneInfo::Tz => {
341 write!(f, "{time_zone}{len_fmt}")?;
342 }
343 _ => {
344 write!(f, "{len_fmt}{time_zone}")?;
345 }
346 }
347
348 Ok(())
349}
350
351/// Timestamp and Time data types information about TimeZone formatting.
352///
353/// This is more related to a display information than real differences between each variant. To
354/// guarantee compatibility with the input query we must maintain its exact information.
355#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
356#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
357#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
358pub enum TimezoneInfo {
359 /// No information about time zone. E.g., TIMESTAMP
360 None,
361 /// Temporal type 'WITH TIME ZONE'. E.g., TIMESTAMP WITH TIME ZONE, [standard], [Oracle]
362 ///
363 /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
364 /// [Oracle]: https://docs.oracle.com/en/database/oracle/oracle-database/12.2/nlspg/datetime-data-types-and-time-zone-support.html#GUID-3F1C388E-C651-43D5-ADBC-1A49E5C2CA05
365 WithTimeZone,
366 /// Temporal type 'WITHOUT TIME ZONE'. E.g., TIME WITHOUT TIME ZONE, [standard], [Postgresql]
367 ///
368 /// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
369 /// [Postgresql]: https://www.postgresql.org/docs/current/datatype-datetime.html
370 WithoutTimeZone,
371 /// Postgresql specific `WITH TIME ZONE` formatting, for both TIME and TIMESTAMP. E.g., TIMETZ, [Postgresql]
372 ///
373 /// [Postgresql]: https://www.postgresql.org/docs/current/datatype-datetime.html
374 Tz,
375}
376
377impl fmt::Display for TimezoneInfo {
378 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
379 match self {
380 TimezoneInfo::None => {
381 write!(f, "")
382 }
383 TimezoneInfo::WithTimeZone => {
384 write!(f, " WITH TIME ZONE")
385 }
386 TimezoneInfo::WithoutTimeZone => {
387 write!(f, " WITHOUT TIME ZONE")
388 }
389 TimezoneInfo::Tz => {
390 // TZ is the only one that is displayed BEFORE the precision, so the datatype display
391 // must be aware of that. Check <https://www.postgresql.org/docs/14/datatype-datetime.html>
392 // for more information
393 write!(f, "TZ")
394 }
395 }
396 }
397}
398
399/// Additional information for `NUMERIC`, `DECIMAL`, and `DEC` data types
400/// following the 2016 [standard].
401///
402/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
403#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
404#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
405#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
406pub enum ExactNumberInfo {
407 /// No additional information e.g. `DECIMAL`
408 None,
409 /// Only precision information e.g. `DECIMAL(10)`
410 Precision(u64),
411 /// Precision and scale information e.g. `DECIMAL(10,2)`
412 PrecisionAndScale(u64, u64),
413}
414
415impl fmt::Display for ExactNumberInfo {
416 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
417 match self {
418 ExactNumberInfo::None => {
419 write!(f, "")
420 }
421 ExactNumberInfo::Precision(p) => {
422 write!(f, "({p})")
423 }
424 ExactNumberInfo::PrecisionAndScale(p, s) => {
425 write!(f, "({p},{s})")
426 }
427 }
428 }
429}
430
431/// Information about [character length][1], including length and possibly unit.
432///
433/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-length
434#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
435#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
436#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
437pub struct CharacterLength {
438 /// Default (if VARYING) or maximum (if not VARYING) length
439 pub length: u64,
440 /// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly
441 pub unit: Option<CharLengthUnits>,
442}
443
444impl fmt::Display for CharacterLength {
445 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
446 write!(f, "{}", self.length)?;
447 if let Some(unit) = &self.unit {
448 write!(f, " {unit}")?;
449 }
450 Ok(())
451 }
452}
453
454/// Possible units for characters, initially based on 2016 ANSI [standard][1].
455///
456/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units
457#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
458#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
459#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
460pub enum CharLengthUnits {
461 /// CHARACTERS unit
462 Characters,
463 /// OCTETS unit
464 Octets,
465}
466
467impl fmt::Display for CharLengthUnits {
468 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
469 match self {
470 Self::Characters => {
471 write!(f, "CHARACTERS")
472 }
473 Self::Octets => {
474 write!(f, "OCTETS")
475 }
476 }
477 }
478}