parse_mediawiki_sql/
utils.rs

1/*!
2Defines [`memory_map`] to read decompressed MediaWiki SQL files,
3and [`NamespaceMap`] to display a page title prefixed by its namespace name.
4*/
5
6use std::{
7    fs::File,
8    path::{Path, PathBuf},
9};
10
11use thiserror::Error;
12
13pub use memmap2::Mmap;
14
15/**
16Memory-maps a file, returning a useful message in case of error.
17
18Pass a borrowed memory map to [`iterate_sql_insertions`](crate::iterate_sql_insertions) so that the [schema](crate::schemas) struct
19produced by the iterator can borrow from the file's contents. See the [example](crate#example) in the crate documentation.
20
21# Errors
22In case of error, returns an [`struct@Error`] containing the action that failed, the path, and the underlying [`std::io::Error`].
23
24# Safety
25
26Inherits unsafe annotation from [`Mmap::map`].
27*/
28pub unsafe fn memory_map<P: AsRef<Path>>(path: P) -> Result<Mmap, Error> {
29    let path = path.as_ref();
30    Mmap::map(&File::open(path).map_err(|source| Error::from_io("open file", source, path))?)
31        .map_err(|source| Error::from_io("memory map file", source, path))
32}
33
34/// The error type used by [`memory_map`] and [`NamespaceMap`].
35#[derive(Debug, Error)]
36#[error("Failed to {action} at {}", path.canonicalize().as_ref().unwrap_or(path).display())]
37pub struct Error {
38    action: &'static str,
39    source: std::io::Error,
40    path: PathBuf,
41}
42
43impl Error {
44    pub fn from_io<P: Into<PathBuf>>(
45        action: &'static str,
46        source: std::io::Error,
47        path: P,
48    ) -> Self {
49        Error {
50            action,
51            source,
52            path: path.into(),
53        }
54    }
55}
56
57pub use mwtitle::{NamespaceMap, Title};
58
59pub trait NamespaceMapExt {
60    fn pretty_title(
61        &self,
62        namespace: crate::field_types::PageNamespace,
63        title: &crate::field_types::PageTitle,
64    ) -> String;
65}
66
67impl NamespaceMapExt for NamespaceMap {
68    fn pretty_title(
69        &self,
70        namespace: crate::field_types::PageNamespace,
71        title: &crate::field_types::PageTitle,
72    ) -> String {
73        self.to_pretty(&Title::new_unchecked(
74            namespace.into_inner(),
75            <&String>::from(title),
76        ))
77        .expect("invalid namespace ID")
78    }
79}