parse_mediawiki_sql/lib.rs
1/*!
2[](https://crates.io/crates/parse-mediawiki-sql)
3[](https://docs.rs/parse-mediawiki-sql)
4
5`parse_mediawiki_sql` parses SQL dumps of a MediaWiki database.
6The SQL dumps are scripts that create a database table and insert rows into it.
7The entry point is `iterate_sql_insertions`, which creates an iterable struct
8from a byte slice (`&[u8]`). The struct is generic over the type returned by the iterator,
9and this type must be one of the structs in the [`schemas`](schemas) module,
10which represent rows in the database, such as [`Page`](schemas::Page).
11
12## Usage
13This crate is available from [crates.io](https://crates.io/crate/parse-mediawiki-sql) and can be
14used by adding `parse-mediawiki-sql` to your dependencies in your project's `Cargo.toml`.
15
16```toml
17[dependencies]
18parse-mediawiki-sql = "0.10"
19```
20
21If you're using Rust 2015, then you’ll also need to add it to your crate root:
22
23```no_run
24extern crate parse_mediawiki_sql;
25```
26
27## Example
28To generate a `Vec` containing the titles of all redirect pages:
29
30```no_run
31# #[cfg(feature = "utils")]
32# fn main() -> Result<(), Box<dyn std::error::Error>> {
33use parse_mediawiki_sql::{
34 iterate_sql_insertions,
35 schemas::Page,
36 field_types::{PageNamespace, PageTitle},
37 utils::memory_map,
38};
39use std::fs::File;
40let page_sql = unsafe { memory_map("page.sql")? };
41let redirects: Vec<(PageNamespace, PageTitle)> =
42 iterate_sql_insertions(&page_sql)
43 .filter_map(
44 |Page { namespace, title, is_redirect, .. }| {
45 if is_redirect {
46 Some((namespace, title))
47 } else {
48 None
49 }
50 },
51 )
52 .collect();
53# Ok(())
54# }
55# #[cfg(not(feature = "utils"))]
56# fn main() {}
57```
58
59Only a mutable reference to the struct is iterable, so a `for`-loop
60must use `&mut` or `.into_iter()` to iterate over the struct:
61
62```no_run
63# #[cfg(feature = "utils")]
64# fn main() -> Result<(), Box<dyn std::error::Error>> {
65# use parse_mediawiki_sql::{
66# iterate_sql_insertions,
67# schemas::Page,
68# utils::memory_map,
69# };
70# let page_sql =
71# unsafe { memory_map("page.sql")? };
72for Page { namespace, title, is_redirect, .. } in &mut iterate_sql_insertions(&page_sql) {
73 if is_redirect {
74 dbg!((namespace, title));
75 }
76}
77# Ok(())
78# }
79# #[cfg(not(feature = "utils"))]
80# fn main() {}
81```
82*/
83
84#![cfg_attr(docsrs, feature(doc_cfg))]
85
86use bstr::{ByteSlice, B};
87use nom::{
88 branch::alt,
89 bytes::streaming::{tag, take_while},
90 character::streaming::multispace0,
91 combinator::{iterator, opt, recognize, ParserIterator},
92 sequence::{preceded, tuple},
93};
94
95pub mod error;
96pub mod field_types;
97pub mod from_sql;
98pub mod schemas;
99
100pub use error::Error;
101pub use from_sql::IResult;
102#[cfg(feature = "utils")]
103#[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
104pub mod utils;
105
106/**
107Trait for converting from a SQL tuple to a Rust type,
108which can borrow from the string or not.
109Used by [`iterate_sql_insertions`].
110*/
111pub trait FromSqlTuple<'input>: Sized {
112 fn from_sql_tuple(s: &'input [u8]) -> IResult<'input, Self>;
113}
114
115/**
116The entry point of the crate. Takes a SQL dump of a MediaWiki database table as bytes
117and yields an iterator over structs representing rows in the table.
118
119The return value is iterable as a mutable reference,
120and when iterated it yields structs representing the database rows (`Row`).
121These rows are represented as tuples in the SQL code.
122The tuples are parsed using [`FromSqlTuple::from_sql_tuple`]
123and the fields in the tuples are parsed by [`FromSql::from_sql`](from_sql::FromSql::from_sql).
124
125See the [example][crate#example] in the documentation, and see [`schemas`] for the full list of possible `Row`s.
126*/
127#[must_use = "the return type implements `Iterator` as a mutable reference, and does nothing unless consumed"]
128pub fn iterate_sql_insertions<'input, Row>(
129 sql: &'input [u8],
130) -> ParserIterator<&'input [u8], Error<'input>, impl FnMut(&'input [u8]) -> IResult<'input, Row>>
131where
132 Row: FromSqlTuple<'input> + 'input,
133{
134 let sql = &sql[sql.find("INSERT INTO").expect("INSERT INTO statement")..];
135 iterator(
136 sql,
137 preceded(
138 alt((
139 recognize(tuple((
140 opt(multispace0),
141 opt(tag(";")),
142 opt(multispace0),
143 tuple((
144 tag(B("INSERT INTO `")),
145 take_while(|b| (b'a'..=b'z').contains(&b) || b == b'_'),
146 tag(B("` VALUES ")),
147 )),
148 ))),
149 tag(","),
150 )),
151 FromSqlTuple::from_sql_tuple,
152 ),
153 )
154}