sqlparser_mysql/base/
data_type.rs

1use std::fmt;
2use std::str::FromStr;
3
4use nom::branch::alt;
5use nom::bytes::complete::{tag, tag_no_case};
6use nom::character::complete::multispace0;
7use nom::combinator::{map, opt};
8use nom::sequence::{delimited, preceded, terminated, tuple};
9use nom::IResult;
10
11use base::error::ParseSQLError;
12use base::{CommonParser, Literal};
13
14#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
15pub enum DataType {
16    Bool,
17    Char(u16),
18    Varchar(u16),
19    Int(u16),
20    UnsignedInt(u16),
21    Bigint(u16),
22    UnsignedBigint(u16),
23    Tinyint(u16),
24    UnsignedTinyint(u16),
25    Blob,
26    Longblob,
27    Mediumblob,
28    Tinyblob,
29    Double,
30    Float,
31    Real,
32    Tinytext,
33    Mediumtext,
34    Longtext,
35    Text,
36    Json,
37    Uuid,
38    Date,
39    DateTime(u16),
40    Timestamp,
41    Binary(u16),
42    Varbinary(u16),
43    Enum(Vec<Literal>),
44    Decimal(u8, u8),
45}
46
47impl fmt::Display for DataType {
48    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
49        match *self {
50            DataType::Bool => write!(f, "BOOL"),
51            DataType::Char(len) => write!(f, "CHAR({})", len),
52            DataType::Varchar(len) => write!(f, "VARCHAR({})", len),
53            DataType::Int(len) => write!(f, "INT({})", len),
54            DataType::UnsignedInt(len) => write!(f, "INT({}) UNSIGNED", len),
55            DataType::Bigint(len) => write!(f, "BIGINT({})", len),
56            DataType::UnsignedBigint(len) => write!(f, "BIGINT({}) UNSIGNED", len),
57            DataType::Tinyint(len) => write!(f, "TINYINT({})", len),
58            DataType::UnsignedTinyint(len) => write!(f, "TINYINT({}) UNSIGNED", len),
59            DataType::Blob => write!(f, "BLOB"),
60            DataType::Longblob => write!(f, "LONGBLOB"),
61            DataType::Mediumblob => write!(f, "MEDIUMBLOB"),
62            DataType::Tinyblob => write!(f, "TINYBLOB"),
63            DataType::Double => write!(f, "DOUBLE"),
64            DataType::Float => write!(f, "FLOAT"),
65            DataType::Real => write!(f, "REAL"),
66            DataType::Tinytext => write!(f, "TINYTEXT"),
67            DataType::Mediumtext => write!(f, "MEDIUMTEXT"),
68            DataType::Longtext => write!(f, "LONGTEXT"),
69            DataType::Text => write!(f, "TEXT"),
70            DataType::Json => write!(f, "JSON"),
71            DataType::Uuid => write!(f, "UUID"),
72            DataType::Date => write!(f, "DATE"),
73            DataType::DateTime(len) => write!(f, "DATETIME({})", len),
74            DataType::Timestamp => write!(f, "TIMESTAMP"),
75            DataType::Binary(len) => write!(f, "BINARY({})", len),
76            DataType::Varbinary(len) => write!(f, "VARBINARY({})", len),
77            DataType::Enum(_) => write!(f, "ENUM(...)"),
78            DataType::Decimal(m, d) => write!(f, "DECIMAL({}, {})", m, d),
79        }
80    }
81}
82
83impl DataType {
84    // A SQL type specifier.
85    pub fn type_identifier(i: &str) -> IResult<&str, DataType, ParseSQLError<&str>> {
86        alt((
87            Self::type_identifier_first_half,
88            Self::type_identifier_second_half,
89        ))(i)
90    }
91
92    fn type_identifier_first_half(i: &str) -> IResult<&str, DataType, ParseSQLError<&str>> {
93        alt((
94            Self::tiny_int,
95            Self::big_int,
96            Self::sql_int_type,
97            map(alt((tag_no_case("BOOLEAN"), tag_no_case("BOOL"))), |_| {
98                DataType::Bool
99            }),
100            map(
101                tuple((
102                    tag_no_case("CHAR"),
103                    CommonParser::delim_digit,
104                    multispace0,
105                    opt(tag_no_case("BINARY")),
106                )),
107                |t| DataType::Char(Self::len_as_u16(t.1)),
108            ),
109            map(
110                preceded(tag_no_case("DATETIME"), opt(CommonParser::delim_digit)),
111                |fsp| {
112                    DataType::DateTime(match fsp {
113                        Some(fsp) => Self::len_as_u16(fsp),
114                        None => 0,
115                    })
116                },
117            ),
118            map(tag_no_case("DATE"), |_| DataType::Date),
119            map(
120                tuple((tag_no_case("DOUBLE"), multispace0, Self::opt_signed)),
121                |_| DataType::Double,
122            ),
123            map(
124                terminated(
125                    preceded(
126                        tag_no_case("ENUM"),
127                        delimited(tag("("), Literal::value_list, tag(")")),
128                    ),
129                    multispace0,
130                ),
131                DataType::Enum,
132            ),
133            map(
134                tuple((
135                    tag_no_case("FLOAT"),
136                    multispace0,
137                    opt(CommonParser::precision),
138                    multispace0,
139                )),
140                |_| DataType::Float,
141            ),
142            map(
143                tuple((tag_no_case("REAL"), multispace0, Self::opt_signed)),
144                |_| DataType::Real,
145            ),
146            map(tag_no_case("TEXT"), |_| DataType::Text),
147            map(tag_no_case("JSON"), |_| DataType::Json),
148            map(tag_no_case("UUID"), |_| DataType::Uuid),
149            map(
150                tuple((
151                    tag_no_case("TIMESTAMP"),
152                    opt(CommonParser::delim_digit),
153                    multispace0,
154                )),
155                |_| DataType::Timestamp,
156            ),
157            map(
158                tuple((
159                    tag_no_case("VARCHAR"),
160                    CommonParser::delim_digit,
161                    multispace0,
162                    opt(tag_no_case("BINARY")),
163                )),
164                |t| DataType::Varchar(Self::len_as_u16(t.1)),
165            ),
166            Self::decimal_or_numeric,
167        ))(i)
168    }
169
170    fn type_identifier_second_half(i: &str) -> IResult<&str, DataType, ParseSQLError<&str>> {
171        alt((
172            map(
173                tuple((
174                    tag_no_case("BINARY"),
175                    CommonParser::delim_digit,
176                    multispace0,
177                )),
178                |t| DataType::Binary(Self::len_as_u16(t.1)),
179            ),
180            map(tag_no_case("BLOB"), |_| DataType::Blob),
181            map(tag_no_case("LONGBLOB"), |_| DataType::Longblob),
182            map(tag_no_case("MEDIUMBLOB"), |_| DataType::Mediumblob),
183            map(tag_no_case("MEDIUMTEXT"), |_| DataType::Mediumtext),
184            map(tag_no_case("LONGTEXT"), |_| DataType::Longtext),
185            map(tag_no_case("TINYBLOB"), |_| DataType::Tinyblob),
186            map(tag_no_case("TINYTEXT"), |_| DataType::Tinytext),
187            map(
188                tuple((
189                    tag_no_case("VARBINARY"),
190                    CommonParser::delim_digit,
191                    multispace0,
192                )),
193                |t| DataType::Varbinary(Self::len_as_u16(t.1)),
194            ),
195        ))(i)
196    }
197
198    // TODO: rather than copy paste these functions, should create a function that returns a parser
199    // based on the sql int type, just like nom does
200    fn tiny_int(i: &str) -> IResult<&str, DataType, ParseSQLError<&str>> {
201        let (remaining_input, (_, _, len, _, signed)) = tuple((
202            tag_no_case("TINYINT"),
203            multispace0,
204            opt(CommonParser::delim_digit),
205            multispace0,
206            Self::opt_signed,
207        ))(i)?;
208
209        match signed {
210            Some(sign) => {
211                if sign.eq_ignore_ascii_case("UNSIGNED") {
212                    Ok((
213                        remaining_input,
214                        DataType::UnsignedTinyint(len.map(Self::len_as_u16).unwrap_or(1)),
215                    ))
216                } else {
217                    Ok((
218                        remaining_input,
219                        DataType::Tinyint(len.map(Self::len_as_u16).unwrap_or(1)),
220                    ))
221                }
222            }
223            None => Ok((
224                remaining_input,
225                DataType::Tinyint(len.map(Self::len_as_u16).unwrap_or(1)),
226            )),
227        }
228    }
229
230    // TODO: rather than copy paste these functions, should create a function that returns a parser
231    // based on the sql int type, just like nom does
232    fn big_int(i: &str) -> IResult<&str, DataType, ParseSQLError<&str>> {
233        let (remaining_input, (_, _, len, _, signed)) = tuple((
234            tag_no_case("BIGINT"),
235            multispace0,
236            opt(CommonParser::delim_digit),
237            multispace0,
238            Self::opt_signed,
239        ))(i)?;
240
241        match signed {
242            Some(sign) => {
243                if sign.eq_ignore_ascii_case("UNSIGNED") {
244                    Ok((
245                        remaining_input,
246                        DataType::UnsignedBigint(len.map(Self::len_as_u16).unwrap_or(1)),
247                    ))
248                } else {
249                    Ok((
250                        remaining_input,
251                        DataType::Bigint(len.map(Self::len_as_u16).unwrap_or(1)),
252                    ))
253                }
254            }
255            None => Ok((
256                remaining_input,
257                DataType::Bigint(len.map(Self::len_as_u16).unwrap_or(1)),
258            )),
259        }
260    }
261
262    // TODO: rather than copy paste these functions, should create a function that returns a parser
263    // based on the sql int type, just like nom does
264    fn sql_int_type(i: &str) -> IResult<&str, DataType, ParseSQLError<&str>> {
265        let (remaining_input, (_, _, len, _, signed)) = tuple((
266            alt((
267                tag_no_case("INTEGER"),
268                tag_no_case("INT"),
269                tag_no_case("SMALLINT"),
270            )),
271            multispace0,
272            opt(CommonParser::delim_digit),
273            multispace0,
274            Self::opt_signed,
275        ))(i)?;
276
277        match signed {
278            Some(sign) => {
279                if sign.eq_ignore_ascii_case("UNSIGNED") {
280                    Ok((
281                        remaining_input,
282                        DataType::UnsignedInt(len.map(Self::len_as_u16).unwrap_or(32)),
283                    ))
284                } else {
285                    Ok((
286                        remaining_input,
287                        DataType::Int(len.map(Self::len_as_u16).unwrap_or(32)),
288                    ))
289                }
290            }
291            None => Ok((
292                remaining_input,
293                DataType::Int(len.map(Self::len_as_u16).unwrap_or(32)),
294            )),
295        }
296    }
297
298    // TODO(malte): not strictly ok to treat DECIMAL and NUMERIC as identical; the
299    // former has "at least" M precision, the latter "exactly".
300    // See https://dev.mysql.com/doc/refman/5.7/en/precision-math-decimal-characteristics.html
301    fn decimal_or_numeric(i: &str) -> IResult<&str, DataType, ParseSQLError<&str>> {
302        let (remaining_input, precision) = delimited(
303            alt((tag_no_case("DECIMAL"), tag_no_case("NUMERIC"))),
304            opt(CommonParser::precision),
305            multispace0,
306        )(i)?;
307
308        match precision {
309            None => Ok((remaining_input, DataType::Decimal(32, 0))),
310            Some((m, None)) => Ok((remaining_input, DataType::Decimal(m, 0))),
311            Some((m, Some(d))) => Ok((remaining_input, DataType::Decimal(m, d))),
312        }
313    }
314
315    fn opt_signed(i: &str) -> IResult<&str, Option<&str>, ParseSQLError<&str>> {
316        opt(alt((tag_no_case("UNSIGNED"), tag_no_case("SIGNED"))))(i)
317    }
318
319    #[inline]
320    fn len_as_u16(len: &str) -> u16 {
321        match u16::from_str(len) {
322            Ok(v) => v,
323            Err(e) => panic!("{}", e),
324        }
325    }
326}
327
328#[cfg(test)]
329mod tests {
330    use base::DataType;
331
332    #[test]
333    fn sql_types() {
334        let ok = ["bool", "integer(16)", "datetime(16)"];
335        let res_ok: Vec<_> = ok
336            .iter()
337            .map(|t| DataType::type_identifier(t).unwrap().1)
338            .collect();
339
340        assert_eq!(
341            res_ok,
342            vec![DataType::Bool, DataType::Int(16), DataType::DateTime(16)]
343        );
344
345        let not_ok = ["varchar"];
346        let res_not_ok: Vec<_> = not_ok
347            .iter()
348            .map(|t| DataType::type_identifier(t).is_ok())
349            .collect();
350
351        assert!(res_not_ok.into_iter().all(|r| !r));
352    }
353}