1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
/*!
Defines [`memory_map`] to read decompressed MediaWiki SQL files,
and [`NamespaceMap`] to display a page title prefixed by its namespace name.
*/

use std::{
    fs::File,
    path::{Path, PathBuf},
};

use thiserror::Error;

pub use memmap2::Mmap;

/**
Memory-maps a file, returning a useful message in case of error.

Pass a borrowed memory map to [`iterate_sql_insertions`](crate::iterate_sql_insertions) so that the [schema](crate::schemas) struct
produced by the iterator can borrow from the file's contents. See the [example](crate#example) in the crate documentation.

# Errors
In case of error, returns an [`struct@Error`] containing the action that failed, the path, and the underlying [`std::io::Error`].

# Safety

Inherits unsafe annotation from [`Mmap::map`].
*/
pub unsafe fn memory_map<P: AsRef<Path>>(path: P) -> Result<Mmap, Error> {
    let path = path.as_ref();
    Mmap::map(&File::open(path).map_err(|source| Error::from_io("open file", source, path))?)
        .map_err(|source| Error::from_io("memory map file", source, path))
}

/// The error type used by [`memory_map`] and [`NamespaceMap`].
#[derive(Debug, Error)]
#[error("Failed to {action} at {}", path.canonicalize().as_ref().unwrap_or(path).display())]
pub struct Error {
    action: &'static str,
    source: std::io::Error,
    path: PathBuf,
}

impl Error {
    pub fn from_io<P: Into<PathBuf>>(
        action: &'static str,
        source: std::io::Error,
        path: P,
    ) -> Self {
        Error {
            action,
            source,
            path: path.into(),
        }
    }
}

pub use mwtitle::{NamespaceMap, Title};

pub trait NamespaceMapExt {
    fn pretty_title(
        &self,
        namespace: crate::field_types::PageNamespace,
        title: &crate::field_types::PageTitle,
    ) -> String;
}

impl NamespaceMapExt for NamespaceMap {
    fn pretty_title(
        &self,
        namespace: crate::field_types::PageNamespace,
        title: &crate::field_types::PageTitle,
    ) -> String {
        self.to_pretty(&Title::new_unchecked(
            namespace.into_inner(),
            <&String>::from(title),
        ))
        .expect("invalid namespace ID")
    }
}