parse_mediawiki_sql/
from_sql.rs

1/*!
2Defines the [`FromSql`] trait and implements it for external types.
3*/
4
5use bstr::B;
6use either::Either;
7use nom::{
8    branch::alt,
9    bytes::streaming::{escaped_transform, is_not, tag},
10    character::streaming::{char, digit1, one_of},
11    combinator::{map, map_res, opt, recognize},
12    error::context,
13    number::streaming::recognize_float,
14    sequence::{preceded, terminated, tuple},
15};
16use ordered_float::NotNan;
17
18pub type IResult<'a, T> = nom::IResult<&'a [u8], T, crate::error::Error<'a>>;
19
20/**
21Trait for converting from the SQL syntax for a simple type
22(anything other than a tuple) to a Rust type,
23which can borrow from the string or not.
24Used by [`schemas::FromSqlTuple`][crate::FromSqlTuple].
25*/
26pub trait FromSql<'a>: Sized {
27    fn from_sql(s: &'a [u8]) -> IResult<'a, Self>;
28}
29
30/// Parses a [`bool`] from `1` or `0`.
31impl<'a> FromSql<'a> for bool {
32    fn from_sql(s: &'a [u8]) -> IResult<'a, Self> {
33        context("1 or 0", map(one_of("01"), |b| b == '1'))(s)
34    }
35}
36
37// This won't panic if the SQL file is valid and the parser is using
38// the correct numeric types.
39macro_rules! number_impl {
40    (
41        $( #[doc = $com:expr] )*
42        $type_name:ty
43        $implementation:block
44    ) => {
45        $( #[doc = $com] )*
46        impl<'a> FromSql<'a> for $type_name {
47            fn from_sql(s: &'a [u8]) -> IResult<'a, $type_name> {
48                context(
49                    concat!("number (", stringify!($type_name), ")"),
50                    map_res($implementation, |num: &[u8]| {
51                        let s = std::str::from_utf8(num).map_err(Either::Right)?;
52                        s.parse().map_err(Either::Left)
53                    }),
54                )(s)
55            }
56        }
57    };
58    (
59        $( #[doc = $com:expr] )*
60        $type_name:ty
61        $implementation:block
62        $further_processing:block
63    ) => {
64        $( #[doc = $com] )*
65        impl<'a> FromSql<'a> for $type_name {
66            fn from_sql(s: &'a [u8]) -> IResult<'a, $type_name> {
67                context(
68                    concat!("number (", stringify!($type_name), ")"),
69                    map($implementation, $further_processing),
70                )(s)
71            }
72        }
73    };
74}
75
76macro_rules! unsigned_int {
77    ($t:ident) => {
78        number_impl! { $t { recognize(digit1) } }
79    };
80}
81
82unsigned_int!(u8);
83unsigned_int!(u16);
84unsigned_int!(u32);
85unsigned_int!(u64);
86
87macro_rules! signed_int {
88    ($t:ident) => {
89        number_impl! { $t { recognize(tuple((opt(char('-')), digit1))) } }
90    };
91}
92
93signed_int!(i8);
94signed_int!(i16);
95signed_int!(i32);
96signed_int!(i64);
97
98macro_rules! float {
99    ($t:ident) => {
100        number_impl! {
101            #[doc = concat!("Matches a float literal with [`recognize_float`] and parses it as a [`", stringify!($t), "`].")]
102            $t { recognize_float }
103        }
104
105        number_impl! {
106            // Link to `<$t as FromSql>::from_sql` when https://github.com/rust-lang/rust/issues/74563 is resolved.
107            #[doc = concat!("Parses an [`", stringify!($t), "`] and wraps it with [`NotNan::new_unchecked`].")]
108            ///
109            /// # Safety
110            /// This will never accidentally wrap a `NaN` because `nom`'s [`recognize_float`] doesn't include a representation of `NaN`.
111            NotNan<$t> {
112                <$t>::from_sql
113            } {
114                |float| unsafe { NotNan::new_unchecked(float) }
115            }
116        }
117    };
118}
119
120float!(f32);
121float!(f64);
122
123/// Used for byte strings that have no escape sequences.
124impl<'a> FromSql<'a> for &'a [u8] {
125    fn from_sql(s: &'a [u8]) -> IResult<'a, Self> {
126        context(
127            "byte string with no escape sequences",
128            preceded(
129                tag("'"),
130                terminated(
131                    map(opt(is_not(B("'"))), |opt| opt.unwrap_or_else(|| B(""))),
132                    tag("'"),
133                ),
134            ),
135        )(s)
136    }
137}
138
139/// Used for types represented as strings without escape sequences. For instance,
140/// [`Timestamp`](crate::field_types::Timestamp)s matches the regex `^[0-9: -]+$`
141/// and thus never has any escape sequences.
142impl<'a> FromSql<'a> for &'a str {
143    fn from_sql(s: &'a [u8]) -> IResult<'a, Self> {
144        context(
145            "string with no escape sequences",
146            map_res(<&[u8]>::from_sql, std::str::from_utf8),
147        )(s)
148    }
149}
150
151/// Use this for string types that require unescaping and are guaranteed
152/// to be valid UTF-8, like page titles.
153impl<'a> FromSql<'a> for String {
154    fn from_sql(s: &'a [u8]) -> IResult<'a, Self> {
155        context("string", map_res(<Vec<u8>>::from_sql, String::from_utf8))(s)
156    }
157}
158
159/// Used for "strings" that sometimes contain invalid UTF-8, like the
160/// `cl_sortkey` field in the `categorylinks` table, which is truncated to 230
161/// bits, sometimes in the middle of a UTF-8 sequence.
162impl<'a> FromSql<'a> for Vec<u8> {
163    fn from_sql(s: &'a [u8]) -> IResult<'a, Self> {
164        context(
165            "byte string",
166            preceded(
167                tag("'"),
168                terminated(
169                    map(
170                        opt(escaped_transform(
171                            is_not(B("\\\"'")),
172                            '\\',
173                            map(one_of(B(r#"0btnrZ\'""#)), |b| match b {
174                                '0' => B("\0"),
175                                'b' => b"\x08",
176                                't' => b"\t",
177                                'n' => b"\n",
178                                'r' => b"\r",
179                                'Z' => b"\x1A",
180                                '\\' => b"\\",
181                                '\'' => b"'",
182                                '"' => b"\"",
183                                _ => unreachable!(),
184                            }),
185                        )),
186                        |opt| opt.unwrap_or_default(),
187                    ),
188                    tag("'"),
189                ),
190            ),
191        )(s)
192    }
193}
194
195impl<'a> FromSql<'a> for () {
196    fn from_sql(s: &'a [u8]) -> IResult<'a, Self> {
197        context("unit type", map(tag("NULL"), |_| ()))(s)
198    }
199}
200
201impl<'a, T> FromSql<'a> for Option<T>
202where
203    T: FromSql<'a>,
204{
205    fn from_sql(s: &'a [u8]) -> IResult<'a, Self> {
206        context(
207            "optional type",
208            alt((
209                context("“NULL”", map(<()>::from_sql, |_| None)),
210                map(T::from_sql, Some),
211            )),
212        )(s)
213    }
214}