use crate::display::WhitespaceDisplayer;
use crate::site_info::{NamespaceAlias, NamespaceInfo, SiteInfo};
use crate::{Error, Result, Title, NS_MAIN};
use bytemuck::TransparentWrapper;
#[cfg(feature = "utils")]
#[cfg_attr(docs, doc(cfg(feature = "utils")))]
use flate2::read::GzDecoder;
use std::{collections::HashMap, iter::FusedIterator, sync::Arc};
#[cfg(feature = "utils")]
#[cfg_attr(docs, doc(cfg(feature = "utils")))]
use std::{io::Read, path::Path};
#[cfg(feature = "utils")]
#[cfg_attr(docs, doc(cfg(feature = "utils")))]
use crate::SiteInfoResponse;
pub enum Namespace<'a> {
Id(i32),
NameOrAlias(&'a str),
}
impl<'a> From<&'a str> for Namespace<'a> {
fn from(name_or_alias: &'a str) -> Self {
Namespace::NameOrAlias(name_or_alias)
}
}
impl From<i32> for Namespace<'_> {
fn from(id: i32) -> Self {
Self::Id(id)
}
}
#[derive(Clone, Debug)]
#[repr(transparent)]
pub(crate) struct NamespaceString(pub(crate) String);
unsafe impl TransparentWrapper<String> for NamespaceString {}
impl NamespaceString {
fn as_namespace_str(&self) -> &NamespaceStringBorrowed {
NamespaceStringBorrowed::from_str(self.0.as_str())
}
}
impl<'a> PartialEq for NamespaceString {
fn eq(&self, other: &Self) -> bool {
self.as_namespace_str().eq(other.as_namespace_str())
}
}
impl<'a> Eq for NamespaceString {}
#[cfg(test)]
const NAMESPACE_STRING_TESTS: [&[&str]; 5] = [
&[
"User talk",
"User_talk",
"user talk",
"user_talk",
"User Talk",
"User_Talk",
"USER TALK",
"USER_TALK",
],
&["Catégorie", "CATÉGORIE"],
&["Συζήτηση χρήστη", "συζήτηση χρήστη", "ΣΥΖΉΤΗΣΗ ΧΡΉΣΤΗ"],
&[
"Обсуждение Викисловаря",
"обсуждение викисловаря",
"ОБСУЖДЕНИЕ ВИКИСЛОВАРЯ",
],
&[
"Մասնակցի քննարկում",
"մասնակցի քննարկում",
"ՄԱՍՆԱԿՑԻ ՔՆՆԱՐԿՈՒՄ",
],
];
#[cfg(test)]
fn for_each_namespace_string_combination(f: impl Fn(&str, &str)) {
for test in NAMESPACE_STRING_TESTS {
for a in test {
for b in test {
f(a, b);
}
}
}
}
#[test]
fn hash_and_eq_for_namespace_string_are_case_and_whitespace_insensitive() {
for_each_namespace_string_combination(|a, b| {
let (a, b) = (
NamespaceString(a.to_string()),
NamespaceString(b.to_string()),
);
assert_eq!(a, b);
assert_eq!(hash(a), hash(b))
});
}
impl std::borrow::Borrow<NamespaceStringBorrowed> for NamespaceString {
fn borrow(&self) -> &NamespaceStringBorrowed {
self.as_namespace_str()
}
}
impl<'a> PartialOrd for NamespaceString {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.as_namespace_str().cmp(other.as_namespace_str()))
}
}
impl<'a> Ord for NamespaceString {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.as_namespace_str()
.partial_cmp(other.as_namespace_str())
.unwrap()
}
}
impl<'a> std::hash::Hash for NamespaceString {
fn hash<H>(&self, hasher: &mut H)
where
H: std::hash::Hasher,
{
self.as_namespace_str().hash(hasher);
}
}
#[cfg(test)]
fn hash(v: impl std::hash::Hash) -> u64 {
use std::hash::Hasher as _;
let mut hasher = std::collections::hash_map::DefaultHasher::new();
v.hash(&mut hasher);
hasher.finish()
}
impl std::convert::From<&str> for NamespaceString {
fn from(s: &str) -> Self {
NamespaceString(s.into())
}
}
#[derive(Debug)]
#[repr(transparent)]
pub(crate) struct NamespaceStringBorrowed(str);
unsafe impl TransparentWrapper<str> for NamespaceStringBorrowed {}
impl NamespaceStringBorrowed {
pub fn from_str(s: &str) -> &Self {
Self::wrap_ref(s)
}
fn chars_normalized(
&self,
) -> impl Iterator<Item = char> + std::iter::FusedIterator + '_ {
enum Iter {
One(Option<char>),
Many(std::char::ToLowercase),
}
impl Iterator for Iter {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
match self {
Iter::One(char) => char.take(),
Iter::Many(chars) => chars.next(),
}
}
}
impl FusedIterator for Iter {}
self.0.chars().flat_map(|c| {
if c == '_' || c == ' ' {
Iter::One(Some('_'))
} else {
Iter::Many(c.to_lowercase())
}
})
}
}
impl PartialEq for NamespaceStringBorrowed {
fn eq(&self, other: &Self) -> bool {
self.chars_normalized().eq(other.chars_normalized())
}
}
impl Eq for NamespaceStringBorrowed {}
#[test]
fn hash_and_eq_for_namespace_string_borrowed_are_case_and_whitespace_insensitive(
) {
for_each_namespace_string_combination(|a, b| {
let (a, b) = (
NamespaceStringBorrowed::from_str(a),
NamespaceStringBorrowed::from_str(b),
);
assert_eq!(a, b);
assert_eq!(hash(&a), hash(&b));
});
}
impl PartialOrd for NamespaceStringBorrowed {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.chars_normalized().cmp(other.chars_normalized()))
}
}
impl Ord for NamespaceStringBorrowed {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.partial_cmp(other).unwrap()
}
}
impl std::hash::Hash for NamespaceStringBorrowed {
fn hash<H>(&self, hasher: &mut H)
where
H: std::hash::Hasher,
{
for c in self.chars_normalized() {
c.hash(hasher);
}
}
}
impl<'a> std::convert::From<&'a str> for &'a NamespaceStringBorrowed {
fn from(s: &'a str) -> Self {
NamespaceStringBorrowed::from_str(s)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct NamespaceMap {
namespaces_by_id: HashMap<i32, Arc<NamespaceInfo>>,
namespaces_by_name_or_alias: HashMap<NamespaceString, Arc<NamespaceInfo>>,
}
impl NamespaceMap {
pub fn from_site_info(site_info: SiteInfo) -> Result<Self> {
Self::from_namespaces_and_namespace_aliases(
site_info.namespaces.into_values(),
site_info.namespace_aliases.into_iter(),
)
}
#[cfg(feature = "utils")]
#[cfg_attr(docs, doc(cfg(feature = "utils")))]
pub fn from_path(path: &Path) -> Result<Self> {
use std::fs::File;
let json = if path.extension() == Some("gz".as_ref()) {
let gz = File::open(path)
.map_err(|source| Error::from_io("open file", source, path))?;
let mut decoder = GzDecoder::new(gz);
let mut decoded = String::new();
decoder
.read_to_string(&mut decoded)
.map_err(|source| Error::from_io("parse GZip", source, path))?;
decoded
} else {
std::fs::read_to_string(path).map_err(|source| {
Error::from_io("read file to string", source, path)
})?
};
Self::from_json_with_path(&json, Some(path))
}
pub fn from_namespaces_and_namespace_aliases<
NS: IntoIterator<Item = NamespaceInfo>,
AL: IntoIterator<Item = NamespaceAlias>,
>(
namespaces: NS,
namespace_aliases: AL,
) -> Result<Self> {
let mut namespaces_by_id = HashMap::new();
let mut namespaces_by_name_or_alias = HashMap::new();
for namespace in namespaces {
let namespace = Arc::new(namespace);
namespaces_by_id.insert(namespace.id, namespace.clone());
namespaces_by_name_or_alias.insert(
NamespaceString(namespace.name.clone()),
namespace.clone(),
);
if let Some(canonical) = namespace.canonical.as_deref() {
namespaces_by_name_or_alias.insert(
NamespaceString(canonical.to_string()),
namespace.clone(),
);
}
}
let mut aliases_not_found = Vec::new();
for alias in namespace_aliases {
if let Some(namespace_info) = namespaces_by_id.get(&alias.id) {
namespaces_by_name_or_alias.insert(
NamespaceString(alias.alias),
namespace_info.clone(),
);
} else {
aliases_not_found.push(alias);
}
}
if aliases_not_found.is_empty() {
Ok(Self {
namespaces_by_id,
namespaces_by_name_or_alias,
})
} else {
Err(Error::UnknownAliases(aliases_not_found))
}
}
pub fn from_iters<
NS: IntoIterator<Item = NI>,
NI: IntoIterator<Item = (String, String)>,
AL: IntoIterator<Item = (String, i32)>,
>(
namespaces: NS,
namespace_aliases: AL,
) -> Result<Self> {
let namespaces = namespaces
.into_iter()
.map(|hash_map| NamespaceInfo::try_from_iter(hash_map))
.collect::<Result<Vec<_>>>()?;
Self::from_namespaces_and_namespace_aliases(
namespaces,
namespace_aliases
.into_iter()
.map(|(alias, id)| NamespaceAlias { id, alias }),
)
}
#[cfg(feature = "utils")]
#[cfg_attr(docs, doc(cfg(feature = "utils")))]
pub fn from_reader<R: Read>(reader: R) -> Result<Self> {
let site_info = serde_json::from_reader::<R, SiteInfoResponse>(reader)
.map_err(|source| Error::Json {
source: Arc::new(source),
})?
.query;
Self::from_site_info(site_info)
}
#[cfg(feature = "utils")]
#[cfg_attr(docs, doc(cfg(feature = "utils")))]
pub fn from_json<S: AsRef<str>>(json: S) -> Result<Self> {
Self::from_json_with_path(json.as_ref(), None)
}
#[cfg(feature = "utils")]
#[cfg_attr(docs, doc(cfg(feature = "utils")))]
fn from_json_with_path(json: &str, path: Option<&Path>) -> Result<Self> {
Self::from_site_info(
serde_json::from_str::<SiteInfoResponse>(json)
.map_err(|source| {
let source = Arc::new(source);
if let Some(path) = path {
Error::JsonFile {
source,
path: path.into(),
}
} else {
Error::Json { source }
}
})?
.query,
)
}
pub fn get_by_id(&self, id: i32) -> Option<&NamespaceInfo> {
self.namespaces_by_id.get(&id).map(|arc| &**arc)
}
fn get_by_name_or_alias<S>(
&self,
name_or_alias: &S,
) -> Option<&NamespaceInfo>
where
S: ?Sized,
NamespaceString: std::borrow::Borrow<S>,
S: std::hash::Hash + Eq,
{
self.namespaces_by_name_or_alias
.get(name_or_alias)
.map(|arc| &**arc)
}
pub fn get_info<'a, 'b, N: Into<Namespace<'b>>>(
&'a self,
namespace: N,
) -> Option<&NamespaceInfo> {
match namespace.into() {
Namespace::Id(id) => self.get_by_id(id),
Namespace::NameOrAlias(name_or_alias) => self.get_by_name_or_alias(
NamespaceStringBorrowed::from_str(name_or_alias),
),
}
}
pub fn get_id<'a, 'b, N: Into<Namespace<'b>>>(
&'a self,
namespace: N,
) -> Option<i32> {
self.get_info(namespace).map(|info| info.id)
}
pub fn get_name<'a, 'b, N: Into<Namespace<'b>>>(
&'a self,
namespace: N,
) -> Option<&'a str> {
self.get_info(namespace).map(|info| &*info.name)
}
pub fn get_case<'a, 'b, N: Into<Namespace<'b>>>(
&'a self,
namespace: N,
) -> Option<&'a str> {
self.get_info(namespace).map(|info| &*info.case)
}
pub fn get_canonical_name<'a, 'b, N: Into<Namespace<'b>>>(
&'a self,
namespace: N,
) -> Option<&'a str> {
self.get_info(namespace)
.and_then(|info| info.canonical.as_deref())
}
pub fn is_capitalized<'a, 'b, N: Into<Namespace<'b>>>(
&'a self,
namespace: N,
) -> Option<bool> {
self.get_info(namespace)
.map(|info| &*info.case)
.map(|case| case == "first-letter")
}
pub fn to_pretty(&self, title: &Title) -> Option<String> {
self.prefixed::<' '>(title, false)
}
pub fn to_underscores(&self, title: &Title) -> Option<String> {
self.prefixed::<'_'>(title, false)
}
pub fn to_pretty_with_fragment(&self, title: &Title) -> Option<String> {
self.prefixed::<' '>(title, true)
}
fn prefixed<const C: char>(
&self,
title: &Title,
include_fragment: bool,
) -> Option<String> {
let (interwiki, iw_colon) = match title.interwiki() {
Some(interwiki) => (interwiki, ":"),
None => ("", ""),
};
let prefix = if title.namespace() == NS_MAIN {
""
} else {
self.get_name(title.namespace())?
};
let colon = if prefix.is_empty() { "" } else { ":" };
let (hash, fragment) = if include_fragment {
match title.fragment.as_deref() {
Some(fragment) => ("#", fragment),
None => ("", ""),
}
} else {
("", "")
};
Some(format!(
"{}{}{}{}{}{}{}",
WhitespaceDisplayer::<C>(interwiki),
iw_colon,
WhitespaceDisplayer::<C>(prefix),
colon,
WhitespaceDisplayer::<C>(title.dbkey()),
hash,
fragment
))
}
}
#[test]
fn siteinfo_can_be_converted_to_namespace_map_and_lookup_is_case_insensitive() {
use std::collections::HashMap;
for (
(namespaces, aliases),
(expected_id_map, expected_name_map),
run_tests,
) in [(
(
[
(0, "", None, "first-letter"),
(1, "Talk", Some("Talk"), "first-letter"),
(10, "Template", Some("Template"), "first-letter"),
(14, "Category", Some("Category"), "first-letter"),
(15, "Category talk", Some("Category talk"), "first-letter"),
],
[("CAT", 14)],
),
(
[
(0, ("", None, "first-letter")),
(1, ("Talk", Some("Talk"), "first-letter")),
(10, ("Template", Some("Template"), "first-letter")),
(14, ("Category", Some("Category"), "first-letter")),
(15, ("Category talk", Some("Category talk"), "first-letter")),
],
[
("", (0, "", None, "first-letter")),
("Talk", (1, "Talk", Some("Talk"), "first-letter")),
(
"Template",
(10, "Template", Some("Template"), "first-letter"),
),
(
"Category",
(14, "Category", Some("Category"), "first-letter"),
),
("CAT", (14, "Category", Some("Category"), "first-letter")),
(
"Category talk",
(
15,
"Category talk",
Some("Category talk"),
"first-letter",
),
),
],
),
|namespace_map: NamespaceMap| {
assert_eq!(namespace_map.get_name(1), Some("Talk"));
assert_eq!(namespace_map.get_name(14), Some("Category"));
for (names, expected) in [
(&["Talk", "talk", "TALK"][..], 1),
(
&[
"Category talk",
"Category_talk",
"CATEGORY TALK",
"CATEGORY_TALK",
],
15,
),
] {
for name in names {
assert_eq!(
namespace_map.get_id(*name),
Some(expected),
"\n{}",
name
);
}
}
},
)] {
let namespaces =
Vec::from_iter(namespaces.map(|(id, name, canonical, case)| {
NamespaceInfo {
id,
name: name.into(),
canonical: canonical.map(String::from),
case: case.into(),
}
}));
let namespacealiases =
Vec::from(aliases.map(|(alias, id)| NamespaceAlias {
alias: alias.into(),
id,
}));
let expected = Ok(NamespaceMap {
namespaces_by_id: HashMap::from_iter(expected_id_map.map(
|(id, (name, canonical, case))| {
(
id,
Arc::new(NamespaceInfo {
id,
name: name.into(),
canonical: canonical.map(String::from),
case: case.into(),
}),
)
},
)),
namespaces_by_name_or_alias: HashMap::from_iter(
expected_name_map.map(
|(name_or_alias, (id, name, canonical, case))| {
(
name_or_alias.into(),
Arc::new(NamespaceInfo {
id,
name: name.into(),
canonical: canonical.map(String::from),
case: case.into(),
}),
)
},
),
),
});
let namespace_map =
NamespaceMap::from_namespaces_and_namespace_aliases(
namespaces.clone(),
namespacealiases.clone(),
)
.map_err(|e| {
if let Error::UnknownAliases(aliases) = e {
Some(aliases)
} else {
None
}
});
assert_eq!(
namespace_map, expected,
"\nconverting {:?}\n{:?}",
&namespaces, &namespacealiases
);
run_tests(namespace_map.unwrap());
}
}