sql_parse/
data_type.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5// http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13use alloc::{boxed::Box, vec::Vec};
14
15use crate::{
16    expression::{parse_expression, Expression},
17    keywords::Keyword,
18    lexer::Token,
19    parser::{ParseError, Parser},
20    span::OptSpanned,
21    Identifier, SString, Span, Spanned,
22};
23
24/// A property on a datatype
25#[derive(Debug, Clone)]
26pub enum DataTypeProperty<'a> {
27    Signed(Span),
28    Unsigned(Span),
29    Zerofill(Span),
30    Null(Span),
31    NotNull(Span),
32    Default(Box<Expression<'a>>),
33    Comment(SString<'a>),
34    Charset(Identifier<'a>),
35    Collate(Identifier<'a>),
36    Virtual(Span),
37    Persistent(Span),
38    Stored(Span),
39    Unique(Span),
40    UniqueKey(Span),
41    GeneratedAlways(Span),
42    AutoIncrement(Span),
43    PrimaryKey(Span),
44    As((Span, Box<Expression<'a>>)),
45    Check((Span, Box<Expression<'a>>)),
46}
47
48impl<'a> Spanned for DataTypeProperty<'a> {
49    fn span(&self) -> Span {
50        match &self {
51            DataTypeProperty::Signed(v) => v.span(),
52            DataTypeProperty::Unsigned(v) => v.span(),
53            DataTypeProperty::Zerofill(v) => v.span(),
54            DataTypeProperty::Null(v) => v.span(),
55            DataTypeProperty::NotNull(v) => v.span(),
56            DataTypeProperty::Default(v) => v.span(),
57            DataTypeProperty::Comment(v) => v.span(),
58            DataTypeProperty::Charset(v) => v.span(),
59            DataTypeProperty::Collate(v) => v.span(),
60            DataTypeProperty::Virtual(v) => v.span(),
61            DataTypeProperty::Persistent(v) => v.span(),
62            DataTypeProperty::Stored(v) => v.span(),
63            DataTypeProperty::Unique(v) => v.span(),
64            DataTypeProperty::UniqueKey(v) => v.span(),
65            DataTypeProperty::GeneratedAlways(v) => v.span(),
66            DataTypeProperty::AutoIncrement(v) => v.span(),
67            DataTypeProperty::As((s, v)) => s.join_span(v),
68            DataTypeProperty::Check((s, v)) => s.join_span(v),
69            DataTypeProperty::PrimaryKey(v) => v.span(),
70        }
71    }
72}
73
74#[derive(Debug, Clone)]
75pub struct Timestamp {
76    pub width: Option<(usize, Span)>,
77    pub with_time_zone: Option<Span>,
78}
79
80impl OptSpanned for Timestamp {
81    fn opt_span(&self) -> Option<Span> {
82        self.width.opt_span().opt_join_span(&self.with_time_zone)
83    }
84}
85
86/// Type of datatype
87#[derive(Debug, Clone)]
88pub enum Type<'a> {
89    Boolean,
90    TinyInt(Option<(usize, Span)>),
91    SmallInt(Option<(usize, Span)>),
92    Integer(Option<(usize, Span)>),
93    Int(Option<(usize, Span)>),
94    BigInt(Option<(usize, Span)>),
95    Char(Option<(usize, Span)>),
96    VarChar(Option<(usize, Span)>),
97    TinyText(Option<(usize, Span)>),
98    MediumText(Option<(usize, Span)>),
99    Text(Option<(usize, Span)>),
100    LongText(Option<(usize, Span)>),
101    Enum(Vec<SString<'a>>),
102    Set(Vec<SString<'a>>),
103    Float8,
104    Float(Option<(usize, usize, Span)>),
105    Double(Option<(usize, usize, Span)>),
106    Numeric(usize, usize, Span),
107    DateTime(Option<(usize, Span)>),
108    Timestamp(Timestamp),
109    Timestamptz,
110    Time(Option<(usize, Span)>),
111    TinyBlob(Option<(usize, Span)>),
112    MediumBlob(Option<(usize, Span)>),
113    Date,
114    Blob(Option<(usize, Span)>),
115    LongBlob(Option<(usize, Span)>),
116    VarBinary((usize, Span)),
117    Binary(Option<(usize, Span)>),
118    Named(Span),
119    Json,
120    Bit(usize, Span),
121    Bytea,
122    Inet4,
123    Inet6,
124}
125
126impl<'a> OptSpanned for Type<'a> {
127    fn opt_span(&self) -> Option<Span> {
128        match &self {
129            Type::Boolean => None,
130            Type::TinyInt(v) => v.opt_span(),
131            Type::SmallInt(v) => v.opt_span(),
132            Type::Integer(v) => v.opt_span(),
133            Type::Int(v) => v.opt_span(),
134            Type::BigInt(v) => v.opt_span(),
135            Type::Char(v) => v.opt_span(),
136            Type::VarChar(v) => v.opt_span(),
137            Type::TinyText(v) => v.opt_span(),
138            Type::MediumText(v) => v.opt_span(),
139            Type::Text(v) => v.opt_span(),
140            Type::LongText(v) => v.opt_span(),
141            Type::Enum(v) => v.opt_span(),
142            Type::Set(v) => v.opt_span(),
143            Type::Float8 => None,
144            Type::Float(v) => v.opt_span(),
145            Type::Double(v) => v.opt_span(),
146            Type::Numeric(_, _, v) => v.opt_span(),
147            Type::DateTime(v) => v.opt_span(),
148            Type::Timestamp(v) => v.opt_span(),
149            Type::Time(v) => v.opt_span(),
150            Type::TinyBlob(v) => v.opt_span(),
151            Type::MediumBlob(v) => v.opt_span(),
152            Type::Date => None,
153            Type::Blob(v) => v.opt_span(),
154            Type::LongBlob(v) => v.opt_span(),
155            Type::VarBinary(v) => v.opt_span(),
156            Type::Binary(v) => v.opt_span(),
157            Type::Timestamptz => None,
158            Type::Named(v) => v.opt_span(),
159            Type::Json => None,
160            Type::Bit(_, b) => b.opt_span(),
161            Type::Bytea => None,
162            Type::Inet4 => None,
163            Type::Inet6 => None,
164        }
165    }
166}
167
168/// Type of data
169#[derive(Debug, Clone)]
170pub struct DataType<'a> {
171    /// Span of type_ identifier
172    pub identifier: Span,
173    /// Type with width
174    pub type_: Type<'a>,
175    /// Properties on type
176    pub properties: Vec<DataTypeProperty<'a>>,
177}
178
179impl<'a> Spanned for DataType<'a> {
180    fn span(&self) -> Span {
181        self.identifier
182            .join_span(&self.type_)
183            .join_span(&self.properties)
184    }
185}
186fn parse_width(parser: &mut Parser<'_, '_>) -> Result<Option<(usize, Span)>, ParseError> {
187    if !matches!(parser.token, Token::LParen) {
188        return Ok(None);
189    }
190    parser.consume_token(Token::LParen)?;
191    let value = parser.recovered(")", &|t| t == &Token::RParen, |parser| parser.consume_int())?;
192    parser.consume_token(Token::RParen)?;
193    Ok(Some(value))
194}
195
196fn parse_width_req(parser: &mut Parser<'_, '_>) -> Result<(usize, Span), ParseError> {
197    if !matches!(parser.token, Token::LParen) {
198        return parser.expected_failure("'('");
199    }
200    Ok(parse_width(parser)?.expect("width"))
201}
202
203fn parse_enum_set_values<'a>(parser: &mut Parser<'a, '_>) -> Result<Vec<SString<'a>>, ParseError> {
204    parser.consume_token(Token::LParen)?;
205    let mut ans = Vec::new();
206    parser.recovered(")", &|t| t == &Token::RParen, |parser| {
207        loop {
208            ans.push(parser.consume_string()?);
209            match &parser.token {
210                Token::Comma => {
211                    parser.consume_token(Token::Comma)?;
212                }
213                Token::RParen => break,
214                _ => parser.expected_failure("',' or ')'")?,
215            }
216        }
217        Ok(())
218    })?;
219    parser.consume_token(Token::RParen)?;
220    Ok(ans)
221}
222
223pub(crate) fn parse_data_type<'a>(
224    parser: &mut Parser<'a, '_>,
225    no_as: bool,
226) -> Result<DataType<'a>, ParseError> {
227    let (identifier, type_) = match &parser.token {
228        Token::Ident(_, Keyword::BOOLEAN) => {
229            (parser.consume_keyword(Keyword::BOOLEAN)?, Type::Boolean)
230        }
231        Token::Ident(_, Keyword::TINYINT) => (
232            parser.consume_keyword(Keyword::TINYINT)?,
233            Type::TinyInt(parse_width(parser)?),
234        ),
235        Token::Ident(_, Keyword::SMALLINT) => (
236            parser.consume_keyword(Keyword::SMALLINT)?,
237            Type::SmallInt(parse_width(parser)?),
238        ),
239        Token::Ident(_, Keyword::INTEGER) => (
240            parser.consume_keyword(Keyword::INTEGER)?,
241            Type::Integer(parse_width(parser)?),
242        ),
243        Token::Ident(_, Keyword::INT) => (
244            parser.consume_keyword(Keyword::INT)?,
245            Type::Int(parse_width(parser)?),
246        ),
247        Token::Ident(_, Keyword::BIGINT) => (
248            parser.consume_keyword(Keyword::BIGINT)?,
249            Type::BigInt(parse_width(parser)?),
250        ),
251        Token::Ident(_, Keyword::INET4) => (parser.consume_keyword(Keyword::INET4)?, Type::Inet4),
252        Token::Ident(_, Keyword::INET6) => (parser.consume_keyword(Keyword::INET6)?, Type::Inet6),
253        Token::Ident(_, Keyword::TINYTEXT) => (
254            parser.consume_keyword(Keyword::TINYTEXT)?,
255            Type::TinyText(parse_width(parser)?),
256        ),
257        Token::Ident(_, Keyword::CHAR) => (
258            parser.consume_keyword(Keyword::CHAR)?,
259            Type::Char(parse_width(parser)?),
260        ),
261        Token::Ident(_, Keyword::TEXT) => (
262            parser.consume_keyword(Keyword::TEXT)?,
263            Type::Text(parse_width(parser)?),
264        ),
265        Token::Ident(_, Keyword::MEDIUMTEXT) => (
266            parser.consume_keyword(Keyword::MEDIUMTEXT)?,
267            Type::MediumText(parse_width(parser)?),
268        ),
269        Token::Ident(_, Keyword::LONGTEXT) => (
270            parser.consume_keyword(Keyword::LONGTEXT)?,
271            Type::LongText(parse_width(parser)?),
272        ),
273        Token::Ident(_, Keyword::VARCHAR) => (
274            parser.consume_keyword(Keyword::VARCHAR)?,
275            Type::VarChar(parse_width(parser)?),
276        ),
277        Token::Ident(_, Keyword::TINYBLOB) => (
278            parser.consume_keyword(Keyword::TINYBLOB)?,
279            Type::TinyBlob(parse_width(parser)?),
280        ),
281        Token::Ident(_, Keyword::BLOB) => (
282            parser.consume_keyword(Keyword::BLOB)?,
283            Type::Blob(parse_width(parser)?),
284        ),
285        Token::Ident(_, Keyword::MEDIUMBLOB) => (
286            parser.consume_keyword(Keyword::MEDIUMBLOB)?,
287            Type::MediumBlob(parse_width(parser)?),
288        ),
289        Token::Ident(_, Keyword::LONGBLOB) => (
290            parser.consume_keyword(Keyword::LONGBLOB)?,
291            Type::LongBlob(parse_width(parser)?),
292        ),
293        Token::Ident(_, Keyword::VARBINARY) => (
294            parser.consume_keyword(Keyword::VARBINARY)?,
295            Type::VarBinary(parse_width_req(parser)?),
296        ),
297        Token::Ident(_, Keyword::BINARY) => (
298            parser.consume_keyword(Keyword::BINARY)?,
299            Type::Binary(parse_width(parser)?),
300        ),
301        Token::Ident(_, Keyword::FLOAT8) => {
302            (parser.consume_keyword(Keyword::FLOAT8)?, Type::Float8)
303        }
304        Token::Ident(_, Keyword::REAL) => {
305            let i = parser.consume_keyword(Keyword::REAL)?;
306            if parser.options.dialect.is_sqlite() {
307                (i, Type::Double(None))
308            } else {
309                (i, Type::Float(None))
310            }
311        }
312        Token::Ident(_, Keyword::FLOAT) => {
313            (parser.consume_keyword(Keyword::FLOAT)?, Type::Float(None)) // TODO
314        }
315        Token::Ident(_, Keyword::DOUBLE) => {
316            let i = if parser.options.dialect.is_postgresql() {
317                parser.consume_keywords(&[Keyword::DOUBLE, Keyword::PRECISION])?
318            } else {
319                parser.consume_keyword(Keyword::DOUBLE)?
320            };
321            (i, Type::Double(None)) // TODO
322        }
323        Token::Ident(_, Keyword::NUMERIC) => {
324            let numeric = parser.consume_keyword(Keyword::NUMERIC)?;
325            let left = parser.consume_token(Token::LParen)?;
326            let (v1, s1) = parser.consume_int()?;
327            let comma = parser.consume_token(Token::Comma)?;
328            let (v2, s2) = parser.consume_int()?;
329            let right = parser.consume_token(Token::RParen)?;
330            (
331                numeric,
332                Type::Numeric(
333                    v1,
334                    v2,
335                    left.join_span(&s1)
336                        .join_span(&comma)
337                        .join_span(&s2)
338                        .join_span(&right),
339                ),
340            )
341        }
342        Token::Ident(_, Keyword::DATETIME) => (
343            parser.consume_keyword(Keyword::DATETIME)?,
344            Type::DateTime(parse_width(parser)?),
345        ),
346        Token::Ident(_, Keyword::TIME) => (
347            parser.consume_keyword(Keyword::TIME)?,
348            Type::Time(parse_width(parser)?),
349        ),
350        Token::Ident(_, Keyword::TIMESTAMPTZ) => (
351            parser.consume_keyword(Keyword::TIMESTAMPTZ)?,
352            Type::Timestamptz,
353        ),
354        Token::Ident(_, Keyword::TIMESTAMP) => {
355            let timestamp_span = parser.consume_keyword(Keyword::TIMESTAMP)?;
356            let width = parse_width(parser)?;
357            let with_time_zone = match parser.skip_keyword(Keyword::WITH) {
358                Some(with_span) => Some(
359                    with_span.join_span(&parser.consume_keywords(&[Keyword::TIME, Keyword::ZONE])?),
360                ),
361                None => None,
362            };
363            let timestamp = Timestamp {
364                width,
365                with_time_zone,
366            };
367            (timestamp_span, Type::Timestamp(timestamp))
368        }
369        Token::Ident(_, Keyword::DATE) => (parser.consume_keyword(Keyword::DATE)?, Type::Date),
370        Token::Ident(_, Keyword::ENUM) => (
371            parser.consume_keyword(Keyword::ENUM)?,
372            Type::Enum(parse_enum_set_values(parser)?),
373        ),
374        Token::Ident(_, Keyword::SET) => (
375            parser.consume_keyword(Keyword::SET)?,
376            Type::Set(parse_enum_set_values(parser)?),
377        ),
378        Token::Ident(_, Keyword::JSON) => (parser.consume_keyword(Keyword::JSON)?, Type::Json),
379        Token::Ident(_, Keyword::BYTEA) => (parser.consume_keyword(Keyword::BYTEA)?, Type::Bytea),
380        Token::Ident(_, Keyword::BIT) => {
381            let t = parser.consume_keyword(Keyword::BIT)?;
382            let (w, ws) = parse_width_req(parser)?;
383            (t, Type::Bit(w, ws))
384        }
385        Token::Ident(_, _) if parser.options.dialect.is_postgresql() => {
386            let name = parser.consume();
387            (name.clone(), Type::Named(name))
388        }
389        _ => parser.expected_failure("type")?,
390    };
391    let mut properties = Vec::new();
392    loop {
393        match parser.token {
394            Token::Ident(_, Keyword::SIGNED) => properties.push(DataTypeProperty::Signed(
395                parser.consume_keyword(Keyword::SIGNED)?,
396            )),
397            Token::Ident(_, Keyword::AUTO_INCREMENT) => properties.push(
398                DataTypeProperty::AutoIncrement(parser.consume_keyword(Keyword::AUTO_INCREMENT)?),
399            ),
400            Token::Ident(_, Keyword::UNSIGNED) => properties.push(DataTypeProperty::Unsigned(
401                parser.consume_keyword(Keyword::UNSIGNED)?,
402            )),
403            Token::Ident(_, Keyword::ZEROFILL) => properties.push(DataTypeProperty::Zerofill(
404                parser.consume_keyword(Keyword::ZEROFILL)?,
405            )),
406            Token::Ident(_, Keyword::NULL) => properties.push(DataTypeProperty::Null(
407                parser.consume_keyword(Keyword::NULL)?,
408            )),
409            Token::Ident(_, Keyword::NOT) => {
410                let start = parser.consume_keyword(Keyword::NOT)?.start;
411                properties.push(DataTypeProperty::NotNull(
412                    start..parser.consume_keyword(Keyword::NULL)?.end,
413                ));
414            }
415            Token::Ident(_, Keyword::CHARACTER) => {
416                parser.consume_keywords(&[Keyword::CHARACTER, Keyword::SET])?;
417                properties.push(DataTypeProperty::Charset(
418                    parser.consume_plain_identifier()?,
419                ));
420            }
421            Token::Ident(_, Keyword::COLLATE) => {
422                parser.consume_keyword(Keyword::COLLATE)?;
423                properties.push(DataTypeProperty::Charset(
424                    parser.consume_plain_identifier()?,
425                ));
426            }
427            Token::Ident(_, Keyword::COMMENT) => {
428                parser.consume_keyword(Keyword::COMMENT)?;
429                properties.push(DataTypeProperty::Comment(parser.consume_string()?));
430            }
431            Token::Ident(_, Keyword::DEFAULT) => {
432                parser.consume_keyword(Keyword::DEFAULT)?;
433                properties.push(DataTypeProperty::Default(Box::new(parse_expression(
434                    parser, true,
435                )?)));
436            }
437            Token::Ident(_, Keyword::VIRTUAL) => properties.push(DataTypeProperty::Virtual(
438                parser.consume_keyword(Keyword::VIRTUAL)?,
439            )),
440            Token::Ident(_, Keyword::PERSISTENT) => properties.push(DataTypeProperty::Persistent(
441                parser.consume_keyword(Keyword::PERSISTENT)?,
442            )),
443            Token::Ident(_, Keyword::STORED) => properties.push(DataTypeProperty::Stored(
444                parser.consume_keyword(Keyword::STORED)?,
445            )),
446            Token::Ident(_, Keyword::UNIQUE) => {
447                let span = parser.consume_keyword(Keyword::UNIQUE)?;
448                if let Some(s2) = parser.skip_keyword(Keyword::KEY) {
449                    properties.push(DataTypeProperty::UniqueKey(s2.join_span(&span)));
450                } else {
451                    properties.push(DataTypeProperty::Unique(span));
452                }
453            }
454            Token::Ident(_, Keyword::GENERATED) => {
455                if parser.options.dialect.is_postgresql() {
456                    properties.push(DataTypeProperty::GeneratedAlways(parser.consume_keywords(
457                        &[
458                            Keyword::GENERATED,
459                            Keyword::ALWAYS,
460                            Keyword::AS,
461                            Keyword::IDENTITY,
462                        ],
463                    )?))
464                } else {
465                    properties.push(DataTypeProperty::GeneratedAlways(
466                        parser.consume_keywords(&[Keyword::GENERATED, Keyword::ALWAYS])?,
467                    ))
468                }
469            }
470            Token::Ident(_, Keyword::AS) if !no_as => {
471                let span = parser.consume_keyword(Keyword::AS)?;
472                let s1 = parser.consume_token(Token::LParen)?;
473                let e = parser.recovered(")", &|t| t == &Token::RParen, |parser| {
474                    Ok(Some(parse_expression(parser, false)?))
475                })?;
476                let s2 = parser.consume_token(Token::RParen)?;
477                let e = e.unwrap_or_else(|| Expression::Invalid(s1.join_span(&s2)));
478                properties.push(DataTypeProperty::As((span, Box::new(e))));
479            }
480            Token::Ident(_, Keyword::PRIMARY) => properties.push(DataTypeProperty::PrimaryKey(
481                parser.consume_keywords(&[Keyword::PRIMARY, Keyword::KEY])?,
482            )),
483            Token::Ident(_, Keyword::CHECK) => {
484                let span = parser.consume_keyword(Keyword::CHECK)?;
485                let s1 = parser.consume_token(Token::LParen)?;
486                let e = parser.recovered(")", &|t| t == &Token::RParen, |parser| {
487                    Ok(Some(parse_expression(parser, false)?))
488                })?;
489                let s2 = parser.consume_token(Token::RParen)?;
490                let e = e.unwrap_or_else(|| Expression::Invalid(s1.join_span(&s2)));
491                properties.push(DataTypeProperty::Check((span, Box::new(e))));
492            }
493            _ => break,
494        }
495    }
496    // TODO validate properties order
497    // TODO validate allowed properties
498    Ok(DataType {
499        identifier,
500        type_,
501        properties,
502    })
503}