parse_mediawiki_sql/
schemas.rs

1/*!
2Types that represent rows in tables of the
3[MediaWiki database](https://www.mediawiki.org/wiki/Manual:Database_layout).
4
5Implements the [`FromSqlTuple`] trait for them,
6so that they can be parsed from SQL tuples by [`iterate_sql_insertions`](crate::iterate_sql_insertions).
7*/
8
9use nom::{
10    character::streaming::char,
11    combinator::{cut, map, opt},
12    error::context,
13    sequence::{preceded, terminated, tuple},
14};
15
16use crate::{
17    field_types::{
18        ActorId, CategoryId, ChangeTagDefinitionId, ChangeTagId, CommentId, ContentModel, Expiry,
19        ExternalLinkId, FullPageTitle, LinkTargetId, LogId, MajorMime, MediaType, MinorMime,
20        NotNan, PageAction, PageCount, PageId, PageNamespace, PageRestrictionId, PageTitle,
21        PageType, ProtectionLevel, RecentChangeId, RevisionId, Sha1, Timestamp, UserGroup, UserId,
22    },
23    from_sql::{FromSql, IResult},
24    FromSqlTuple,
25};
26
27#[cfg(feature = "serialization")]
28use serde::{Deserialize, Serialize};
29
30macro_rules! mediawiki_link {
31    (
32        $text:expr,
33        $page:expr $(,)?
34    ) => {
35        concat!("[", $text, "](https://www.mediawiki.org/wiki/", $page, ")")
36    };
37}
38
39macro_rules! with_doc_comment {
40    (
41        $comment:expr,
42        $($item:item)+
43    ) => {
44        #[doc = $comment]
45        $($item)+
46    }
47}
48
49#[rustfmt::skip]
50macro_rules! database_table_doc {
51    (
52        $table_name:ident
53    ) => {
54        concat!(
55            "Represents a row in the ",
56            mediawiki_link!(
57                concat!("`", stringify!($table_name), "` table"),
58                concat!("Manual:", stringify!($table_name), "_table"),
59            ),
60            ".",
61        )
62    };
63    (
64        $table_name:ident, $page_name:literal
65    ) => {
66        concat!(
67            "Represents a row in the ",
68            mediawiki_link!(
69                concat!("`", stringify!($table_name), "` table"),
70                $page_name,
71            ),
72            ".",
73        )
74    };
75}
76
77macro_rules! impl_row_from_sql {
78    (
79        $table_name:ident $(: $page:literal)?
80        $output_type:ident {
81            $(
82                $(#[$field_meta:meta])*
83                $field_name:ident: $type_name:ty
84            ),+
85            $(,)?
86        }
87    ) => {
88        with_doc_comment! {
89            database_table_doc!($table_name $(, $page)?),
90            #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
91            #[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
92            pub struct $output_type {
93                $(
94                    $(#[$field_meta])*
95                    pub $field_name: $type_name,
96                )+
97            }
98
99            impl<'input> FromSqlTuple<'input> for $output_type {
100                fn from_sql_tuple(s: &'input [u8]) -> IResult<'input, Self> {
101                    let fields = cut(
102                        map(
103                            tuple((
104                                $(
105                                    terminated(
106                                        context(
107                                            concat!(
108                                                "the field “",
109                                                stringify!($field_name),
110                                                "”"
111                                            ),
112                                            <$type_name>::from_sql,
113                                        ),
114                                        opt(char(','))
115                                    ),
116                                )+
117                            )),
118                            |($($field_name),+)| $output_type {
119                                $($field_name,)+
120                            }
121                        )
122                    );
123                    context(
124                        concat!("row of ", stringify!($table_name), " table"),
125                            preceded(
126                            char('('),
127                            terminated(
128                                fields,
129                                char(')')
130                            )
131                        )
132                    )(s)
133                }
134            }
135        }
136    };
137    (
138        $table_name:ident $(: $page:literal)?
139        $output_type:ident<$life:lifetime> {
140            $(
141                $(#[$field_meta:meta])*
142                $field_name:ident: $type_name:ty,
143            )+
144        }
145    ) => {
146        with_doc_comment! {
147            database_table_doc!($table_name $(, $page)?),
148            #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
149            #[cfg_attr(feature = "serialization", derive(Serialize, Deserialize))]
150            pub struct $output_type<$life> {
151                $(
152                    $(#[$field_meta])*
153                    pub $field_name: $type_name
154                ),+
155            }
156
157            impl<$life> FromSqlTuple<$life> for $output_type<$life> {
158                fn from_sql_tuple(s: &$life [u8]) -> IResult<$life, Self> {
159                    let fields = cut(
160                        map(
161                            tuple((
162                                $(
163                                    terminated(
164                                        context(
165                                            concat!(
166                                                "the field “",
167                                                stringify!($field_name),
168                                                "”"
169                                            ),
170                                            <$type_name>::from_sql,
171                                        ),
172                                        opt(char(','))
173                                    ),
174                                )+
175                            )),
176                            |($($field_name),+)| $output_type {
177                                $($field_name,)+
178                            }
179                        ),
180                    );
181                    context(
182                        concat!("row in ", stringify!($table_name), " table"),
183                        preceded(
184                            char('('),
185                            terminated(
186                                fields,
187                                char(')')
188                            )
189                        )
190                    )(s)
191                }
192            }
193        }
194    };
195}
196
197impl_row_from_sql! {
198    babel: "Extension:Babel/babel_table"
199    Babel<'input> {
200        user: UserId,
201        lang: &'input str,
202        level: &'input str,
203    }
204}
205
206impl_row_from_sql! {
207    category
208    Category {
209        id: CategoryId,
210        title: PageTitle,
211        pages: PageCount,
212        subcats: PageCount,
213        files: PageCount,
214    }
215}
216
217impl_row_from_sql! {
218    categorylinks
219    CategoryLink {
220        from: PageId,
221        to: PageTitle,
222        /// Can be truncated in the middle of a UTF-8 sequence,
223        /// so cannot be represented as a `String`.
224        sortkey: Vec<u8>,
225        timestamp: Timestamp,
226        /// Values added after
227        /// [this change](https://gerrit.wikimedia.org/r/449280),
228        /// should be valid UTF-8, but older values may be invalid if they have
229        /// been truncated in the middle of a multi-byte sequence.
230        sortkey_prefix: Vec<u8>,
231        collation: String,
232        r#type: PageType,
233    }
234}
235
236impl_row_from_sql! {
237    change_tag
238    ChangeTag {
239        id: ChangeTagId,
240        recent_changes_id: Option<RecentChangeId>,
241        log_id: Option<LogId>,
242        revision_id: Option<RevisionId>,
243        params: Option<String>,
244        tag_id: ChangeTagDefinitionId,
245    }
246}
247
248impl_row_from_sql! {
249    change_tag_def
250    ChangeTagDefinition {
251        id: ChangeTagDefinitionId,
252        name: String,
253        user_defined: bool,
254        count: u64,
255    }
256}
257
258impl_row_from_sql! {
259    externallinks
260    ExternalLink {
261        id: ExternalLinkId,
262        from: PageId,
263        to: String,
264        index: Vec<u8>,
265        index_60: Vec<u8>,
266    }
267}
268
269impl_row_from_sql! {
270    image
271    Image<'input> {
272        name: PageTitle,
273        size: u32,
274        width: i32,
275        height: i32,
276        metadata: String,
277        bits: i32,
278        #[cfg_attr(feature = "serialization", serde(borrow))]
279        media_type: MediaType<'input>,
280        #[cfg_attr(feature = "serialization", serde(borrow))]
281        major_mime: MajorMime<'input>,
282        #[cfg_attr(feature = "serialization", serde(borrow))]
283        minor_mime: MinorMime<'input>,
284        description_id: CommentId,
285        actor: ActorId,
286        timestamp: Timestamp,
287        sha1: Sha1<'input>,
288    }
289}
290
291impl_row_from_sql! {
292    imagelinks
293    ImageLink {
294        from: PageId,
295        to: PageTitle,
296        from_namespace: PageNamespace,
297    }
298}
299
300impl_row_from_sql! {
301    iwlinks
302    InterwikiLink<'input> {
303        from: PageId,
304        #[cfg_attr(feature = "serialization", serde(borrow))]
305        prefix: &'input str,
306        title: PageTitle,
307    }
308}
309
310impl_row_from_sql! {
311    langlinks
312    LanguageLink<'input> {
313        from: PageId,
314        #[cfg_attr(feature = "serialization", serde(borrow))]
315        lang: &'input str,
316        title: FullPageTitle,
317    }
318}
319
320impl_row_from_sql! {
321    linktarget
322    LinkTarget {
323        id: LinkTargetId,
324        namespace: PageNamespace,
325        title: PageTitle,
326    }
327}
328
329impl_row_from_sql! {
330    page_restrictions
331    PageRestriction<'input> {
332        id: PageRestrictionId,
333        page: PageId,
334        #[cfg_attr(feature = "serialization", serde(borrow))]
335        r#type: PageAction<'input>,
336        #[cfg_attr(feature = "serialization", serde(borrow))]
337        level: ProtectionLevel<'input>,
338        cascade: bool,
339        user: Option<u32>,
340        expiry: Option<Expiry>,
341    }
342}
343
344impl_row_from_sql! {
345    page
346    Page<'input> {
347        id: PageId,
348        namespace: PageNamespace,
349        title: PageTitle,
350        is_redirect: bool,
351        is_new: bool,
352        #[cfg_attr(feature = "serialization", serde(serialize_with = "crate::field_types::serialize_not_nan", deserialize_with = "crate::field_types::deserialize_not_nan"))]
353        random: NotNan<f64>,
354        touched: Timestamp,
355        links_updated: Option<Timestamp>,
356        latest: u32,
357        len: u32,
358        #[cfg_attr(feature = "serialization", serde(borrow))]
359        content_model: Option<ContentModel<'input>>,
360        #[cfg_attr(feature = "serialization", serde(borrow))]
361        lang: Option<&'input str>,
362    }
363}
364
365impl_row_from_sql! {
366    pagelinks
367    PageLink {
368        from: PageId,
369        namespace: PageNamespace,
370        title: PageTitle,
371        from_namespace: PageNamespace,
372    }
373}
374
375impl_row_from_sql! {
376    page_props
377    PageProperty<'input> {
378        page: PageId,
379        #[cfg_attr(feature = "serialization", serde(borrow))]
380        name: &'input str,
381        value: Vec<u8>,
382        #[cfg_attr(feature = "serialization", serde(serialize_with = "crate::field_types::serialize_option_not_nan", deserialize_with = "crate::field_types::deserialize_option_not_nan"))]
383        sortkey: Option<NotNan<f64>>,
384    }
385}
386
387impl_row_from_sql! {
388    protected_titles
389    ProtectedTitle<'input> {
390        namespace: PageNamespace,
391        title: PageTitle,
392        user: UserId,
393        reason_id: CommentId,
394        timestamp: Timestamp,
395        expiry: Expiry,
396        #[cfg_attr(feature = "serialization", serde(borrow))]
397        create_perm: ProtectionLevel<'input>,
398    }
399}
400
401impl_row_from_sql! {
402    redirect
403    Redirect<'input> {
404        from: PageId,
405        namespace: PageNamespace,
406        title: PageTitle,
407        #[cfg_attr(feature = "serialization", serde(borrow))]
408        interwiki: Option<&'input str>,
409        fragment: Option<String>,
410    }
411}
412
413impl_row_from_sql! {
414    sites
415    Site<'input> {
416        id: u32,
417        #[cfg_attr(feature = "serialization", serde(borrow))]
418        global_key: &'input str,
419        #[cfg_attr(feature = "serialization", serde(borrow))]
420        r#type: &'input str,
421        #[cfg_attr(feature = "serialization", serde(borrow))]
422        group: &'input str,
423        #[cfg_attr(feature = "serialization", serde(borrow))]
424        source: &'input str,
425        #[cfg_attr(feature = "serialization", serde(borrow))]
426        language: &'input str,
427        #[cfg_attr(feature = "serialization", serde(borrow))]
428        protocol: &'input str,
429        #[cfg_attr(feature = "serialization", serde(borrow))]
430        domain: &'input [u8],
431        data: String,
432        forward: i8,
433        config: String,
434    }
435}
436
437impl_row_from_sql! {
438    site_stats
439    SiteStats {
440        row_id: u32,
441        total_edits: u64,
442        good_articles: u64,
443        total_pages: u64,
444        users: u64,
445        images: u64,
446        active_users: u64,
447    }
448}
449
450impl_row_from_sql! {
451    wbc_entity_usage: "Wikibase/Schema/wbc_entity_usage"
452    WikibaseClientEntityUsage<'input> {
453        row_id: u64,
454        #[cfg_attr(feature = "serialization", serde(borrow))]
455        entity_id: &'input str,
456        #[cfg_attr(feature = "serialization", serde(borrow))]
457        aspect: &'input str,
458        page_id: PageId,
459    }
460}
461
462#[test]
463fn test_redirect() {
464    use bstr::B;
465    let tuple = r"(605368,1,'разблюто','','Discussion from Stephen G. Brown\'s talk-page')";
466    let redirect = Redirect::from_sql_tuple(tuple.as_bytes());
467    assert_eq!(
468        &redirect,
469        &Ok((
470            B(""),
471            Redirect {
472                from: PageId(605368),
473                namespace: PageNamespace(1),
474                title: PageTitle("разблюто".to_string()),
475                interwiki: Some(""),
476                fragment: Some("Discussion from Stephen G. Brown's talk-page".to_string()),
477            }
478        ))
479    );
480    #[cfg(feature = "serialization")]
481    assert_eq!(
482        serde_json::to_string(&redirect.unwrap().1).unwrap(),
483        r#"{"from":605368,"namespace":1,"title":"разблюто","interwiki":"","fragment":"Discussion from Stephen G. Brown's talk-page"}"#,
484    )
485}
486
487impl_row_from_sql! {
488    templatelinks
489    TemplateLink {
490        from: PageId,
491        from_namespace: PageNamespace,
492        target_id: LinkTargetId,
493    }
494}
495
496impl_row_from_sql! {
497    user_former_groups
498    UserFormerGroupMembership<'input> {
499        user: UserId,
500        #[cfg_attr(feature = "serialization", serde(borrow))]
501        group: UserGroup<'input>,
502    }
503}
504
505impl_row_from_sql! {
506    user_groups
507    UserGroupMembership<'input> {
508        user: UserId,
509        #[cfg_attr(feature = "serialization", serde(borrow))]
510        group: UserGroup<'input>,
511        expiry: Option<Expiry>,
512    }
513}