1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
/*!
[![crates.io](https://img.shields.io/crates/v/parse-mediawiki-sql.svg)](https://crates.io/crates/parse-mediawiki-sql)
[![docs.rs](https://img.shields.io/docsrs/parse-mediawiki-sql)](https://docs.rs/parse-mediawiki-sql)
`parse_mediawiki_sql` parses SQL dumps of a MediaWiki database.
The SQL dumps are scripts that create a database table and insert rows into it.
The entry point is `iterate_sql_insertions`, which creates an iterable struct
from a byte slice (`&[u8]`). The struct is generic over the type returned by the iterator,
and this type must be one of the structs in the [`schemas`](schemas) module,
which represent rows in the database, such as [`Page`](schemas::Page).
## Usage
This crate is available from [crates.io](https://crates.io/crate/parse-mediawiki-sql) and can be
used by adding `parse-mediawiki-sql` to your dependencies in your project's `Cargo.toml`.
```toml
[dependencies]
parse-mediawiki-sql = "0.10"
```
If you're using Rust 2015, then you’ll also need to add it to your crate root:
```no_run
extern crate parse_mediawiki_sql;
```
## Example
To generate a `Vec` containing the titles of all redirect pages:
```no_run
# #[cfg(feature = "utils")]
# fn main() -> Result<(), Box<dyn std::error::Error>> {
use parse_mediawiki_sql::{
iterate_sql_insertions,
schemas::Page,
field_types::{PageNamespace, PageTitle},
utils::memory_map,
};
use std::fs::File;
let page_sql = unsafe { memory_map("page.sql")? };
let redirects: Vec<(PageNamespace, PageTitle)> =
iterate_sql_insertions(&page_sql)
.filter_map(
|Page { namespace, title, is_redirect, .. }| {
if is_redirect {
Some((namespace, title))
} else {
None
}
},
)
.collect();
# Ok(())
# }
# #[cfg(not(feature = "utils"))]
# fn main() {}
```
Only a mutable reference to the struct is iterable, so a `for`-loop
must use `&mut` or `.into_iter()` to iterate over the struct:
```no_run
# #[cfg(feature = "utils")]
# fn main() -> Result<(), Box<dyn std::error::Error>> {
# use parse_mediawiki_sql::{
# iterate_sql_insertions,
# schemas::Page,
# utils::memory_map,
# };
# let page_sql =
# unsafe { memory_map("page.sql")? };
for Page { namespace, title, is_redirect, .. } in &mut iterate_sql_insertions(&page_sql) {
if is_redirect {
dbg!((namespace, title));
}
}
# Ok(())
# }
# #[cfg(not(feature = "utils"))]
# fn main() {}
```
*/
#![cfg_attr(docsrs, feature(doc_cfg))]
use bstr::{ByteSlice, B};
use nom::{
branch::alt,
bytes::streaming::{tag, take_while},
character::streaming::multispace0,
combinator::{iterator, opt, recognize, ParserIterator},
sequence::{preceded, tuple},
};
pub mod error;
pub mod field_types;
pub mod from_sql;
pub mod schemas;
pub use error::Error;
pub use from_sql::IResult;
#[cfg(feature = "utils")]
#[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
pub mod utils;
/**
Trait for converting from a SQL tuple to a Rust type,
which can borrow from the string or not.
Used by [`iterate_sql_insertions`].
*/
pub trait FromSqlTuple<'input>: Sized {
fn from_sql_tuple(s: &'input [u8]) -> IResult<'input, Self>;
}
/**
The entry point of the crate. Takes a SQL dump of a MediaWiki database table as bytes
and yields an iterator over structs representing rows in the table.
The return value is iterable as a mutable reference,
and when iterated it yields structs representing the database rows (`Row`).
These rows are represented as tuples in the SQL code.
The tuples are parsed using [`FromSqlTuple::from_sql_tuple`]
and the fields in the tuples are parsed by [`FromSql::from_sql`](from_sql::FromSql::from_sql).
See the [example][crate#example] in the documentation, and see [`schemas`] for the full list of possible `Row`s.
*/
#[must_use = "the return type implements `Iterator` as a mutable reference, and does nothing unless consumed"]
pub fn iterate_sql_insertions<'input, Row>(
sql: &'input [u8],
) -> ParserIterator<&'input [u8], Error<'input>, impl FnMut(&'input [u8]) -> IResult<'input, Row>>
where
Row: FromSqlTuple<'input> + 'input,
{
let sql = &sql[sql.find("INSERT INTO").expect("INSERT INTO statement")..];
iterator(
sql,
preceded(
alt((
recognize(tuple((
opt(multispace0),
opt(tag(";")),
opt(multispace0),
tuple((
tag(B("INSERT INTO `")),
take_while(|b| (b'a'..=b'z').contains(&b) || b == b'_'),
tag(B("` VALUES ")),
)),
))),
tag(","),
)),
FromSqlTuple::from_sql_tuple,
),
)
}