mwtitle 0.2.0-alpha.2

MediaWiki title validation and formatting
Documentation
/*
Copyright (C) Tim Starling
Copyright (C) Daniel Kinzler
Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>
Copyright (C) 2021 Erutuon

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
//! mwtitle
//! =======
//!
//! `mwtitle` is a library for parsing, normalizing and formatting MediaWiki
//! page titles. It is primarily a port of the MediaWikiTitleCodec class
//! from MediaWiki, and passes the MediaWiki test suite.
//!
//! The easiest way to get started is create a [`TitleCodec`] from a [siteinfo](https://www.mediawiki.org/wiki/API:Siteinfo)
//! API request.
//! ```
//! # #[tokio::main]
//! # async fn main() -> anyhow::Result<()> {
//! # #[cfg(feature = "parsing")]
//! # {
//! # use mwtitle::{SiteInfoResponse, TitleCodec};
//! let url = "https://en.wikipedia.org/w/api.php\
//!            ?action=query&meta=siteinfo\
//!            &siprop=general|namespaces|namespacealiases|interwikimap\
//!            &formatversion=2&format=json";
//! let resp: SiteInfoResponse = reqwest::get(url).await?.json().await?;
//! let codec = TitleCodec::from_site_info(resp.query)?;
//! let title = codec.new_title("Talk:Main Page#Section 1")?;
//! assert_eq!(title.namespace(), 1);
//! assert_eq!(title.dbkey(), "Main_Page");
//! assert_eq!(title.fragment(), Some("Section 1"));
//! assert_eq!(codec.to_pretty(&title), "Talk:Main Page".to_string());
//! assert_eq!(
//!     codec.to_pretty_with_fragment(&title),
//!     "Talk:Main Page#Section 1".to_string()
//! );
//! # }
//! # Ok(())
//! # }
//! ```
//!
//! It's also possible to possible to create a `TitleCodec` from a JSON
//! `siteinfo-namespaces.json` or compressed `siteinfo-namespaces.json.gz`
//! that comes from Wikimedia dumps. This requires the extra `utils` feature
//! to be enabled.
//!
//! ## Contributing
//! `mwtitle` is a part of the [`mwbot-rs` project](https://www.mediawiki.org/wiki/Mwbot-rs).
//! We're always looking for new contributors, please [reach out](https://www.mediawiki.org/wiki/Mwbot-rs#Contributing)
//! if you're interested!
#![deny(clippy::all)]
#![cfg_attr(docs, feature(doc_cfg))]

#[cfg(feature = "parsing")]
#[cfg_attr(docs, doc(cfg(feature = "parsing")))]
mod codec;
mod display;
mod error;
mod interwiki_set;
#[cfg(feature = "parsing")]
#[cfg_attr(docs, doc(cfg(feature = "parsing")))]
mod ip;
#[cfg(feature = "parsing")]
#[cfg_attr(docs, doc(cfg(feature = "parsing")))]
mod ipv6;
mod namespace_map;
#[cfg(feature = "parsing")]
#[cfg_attr(docs, doc(cfg(feature = "parsing")))]
mod php;
mod site_info;

#[cfg(feature = "parsing")]
pub use codec::TitleCodec;
pub use display::TitleWhitespace;
pub use error::Error;
pub use interwiki_set::InterwikiSet;
pub use namespace_map::NamespaceMap;
pub use site_info::{
    Interwiki, NamespaceAlias, NamespaceInfo, Response as SiteInfoResponse,
    SiteInfo,
};
pub type Result<T, E = Error> = std::result::Result<T, E>;

const NS_MAIN: i32 = 0;
const NS_FILE: i32 = 6;
const NS_CATEGORY: i32 = 14;

/// Represents a MediaWiki title. A title can be broken down into the following
/// attributes: `[[interwiki:ns:db_key#fragment]]`.
/// * `interwiki`: Optional prefix pointing to another site
/// * `namespace`: Numerical ID corresponding to a MediaWiki namespace
/// * `dbkey`: Page name, with underscores instead of spaces
/// * `fragment`: Optional anchor for a specific section
///
/// ```
/// # use mwtitle::Title;
/// // ns1 is Talk, so this is [[Talk:Main Page]]
/// let title = unsafe { Title::new_unchecked(1, "Main_Page".into()) };
/// assert_eq!(title.namespace(), 1);
/// assert_eq!(title.dbkey(), "Main_Page");
/// assert!(title.interwiki().is_none());
/// assert!(title.fragment().is_none());
/// let title = title.with_fragment("Section 1".into());
/// assert_eq!(title.fragment(), Some("Section 1"));
/// ```
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Title {
    namespace: i32,
    dbkey: String,
    fragment: Option<String>,
    interwiki: Option<String>,
    local_interwiki: bool,
}

impl Title {
    #[inline]
    /// Reorders fields into a reasonable order for `PartialOrd` and `Ord` implementations.
    /// Negates `local_interwiki` to make local interwikis sort first.
    /// The desired order with regard to interwikis:
    /// titles without interwikis, titles with local interwikis, titles with other interwikis
    fn to_sortable(&self) -> impl Ord + '_ {
        let Title {
            namespace,
            dbkey,
            fragment,
            interwiki,
            local_interwiki,
        } = self;
        (
            interwiki.is_some(),
            !local_interwiki,
            interwiki.as_deref(),
            *namespace,
            dbkey,
            fragment.as_deref(),
        )
    }
}

impl PartialOrd for Title {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        self.to_sortable().partial_cmp(&other.to_sortable())
    }
}

impl Ord for Title {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        self.to_sortable().cmp(&other.to_sortable())
    }
}

#[cfg(test)]
macro_rules! title {
    (
        $local_interwiki:literal : $interwiki:literal : $namespace:literal : $dbkey:literal
    ) => {{
        Title {
            local_interwiki: $local_interwiki,
            interwiki: Some($interwiki.into()),
            namespace: $namespace,
            dbkey: $dbkey.into(),
            fragment: Default::default(),
        }
    }};
    (
        $interwiki:literal : $namespace:literal : $dbkey:literal
    ) => {{
        Title {
            interwiki: Some($interwiki.into()),
            namespace: $namespace,
            dbkey: $dbkey.into(),
            local_interwiki: Default::default(),
            fragment: Default::default(),
        }
    }};
    (
        $namespace:literal : $dbkey:literal
    ) => {{
        Title {
            interwiki: None,
            namespace: $namespace,
            dbkey: $dbkey.into(),
            local_interwiki: Default::default(),
            fragment: Default::default(),
        }
    }};
}

#[test]
fn title_ord() {
    let mut titles = vec![
        title!(true:"localinterwiki2":4:"Title"),
        title!(true:"localinterwiki1":4:"Title"),
        title!("interwiki2":4:"Title"),
        title!("interwiki1":4:"Title"),
        title!(4:"Title"),
        title!(0:"Title"),
    ];
    titles.sort();
    assert_eq!(
        &titles,
        &[
            title!(0:"Title"),
            title!(4:"Title"),
            title!(true:"localinterwiki1":4:"Title"),
            title!(true:"localinterwiki2":4:"Title"),
            title!("interwiki1":4:"Title"),
            title!("interwiki2":4:"Title"),
        ]
    );
}

impl Title {
    /// Create a new `Title` from a namespace ID
    /// and database key (title without the namespace prefix),
    /// with no validation on the namespace or text parts.
    ///
    /// Good if you're getting the title from a
    /// trusted place like the API.
    ///
    /// The `dbkey` should have underscores
    /// and be normalized and sanitized
    /// as if it has been processed by [`TitleCodec::new_title`].
    /// The namespace must exist in the [`TitleCodec`] or [`NamespaceMap`]
    /// that will format this title.
    ///
    /// # Safety
    /// If the namespace doesn't exist in the `TitleCodec` or `NamespaceMap`,
    /// some methods, like [`TitleCodec::to_pretty`], will panic.
    ///
    /// If the `dbkey` hasn't been normalized and sanitized,
    /// the ordering implementations ( `Eq`, `PartialEq`, `Ord`, `PartialOrd`)
    /// for the `Title` aren't guaranteed to give the correct results.
    pub unsafe fn new_unchecked(namespace: i32, dbkey: String) -> Self {
        Self {
            namespace,
            dbkey,
            fragment: None,
            interwiki: None,
            local_interwiki: false,
        }
    }

    /// Set a fragment.
    pub fn with_fragment(mut self, fragment: String) -> Self {
        self.fragment = Some(fragment);
        self
    }

    /// Remove the fragment.
    pub fn remove_fragment(mut self) -> Self {
        self.fragment = None;
        self
    }

    /// Get the namespace ID.
    pub fn namespace(&self) -> i32 {
        self.namespace
    }

    /// Get the dbkey.
    pub fn dbkey(&self) -> &str {
        &self.dbkey
    }

    /// Get the fragment, if there is one.
    pub fn fragment(&self) -> Option<&str> {
        self.fragment.as_deref()
    }

    /// Get the interwiki, if there is one.
    pub fn interwiki(&self) -> Option<&str> {
        self.interwiki.as_deref()
    }

    /// Whether this title was created via a local interwiki link.
    pub fn is_local_interwiki(&self) -> bool {
        self.local_interwiki
    }

    /// If the title is a local page that could exist, basically not an
    /// interwiki link, nor a fragment-only link, nor a special page.
    pub fn is_local_page(&self) -> bool {
        self.interwiki.is_none()
            && !self.dbkey.is_empty()
            && self.namespace >= 0
    }

    /// Whether this title refers to a file.
    pub fn is_file(&self) -> bool {
        self.namespace == NS_FILE
    }

    /// Whether this title refers to a category.
    pub fn is_category(&self) -> bool {
        self.namespace == NS_CATEGORY
    }
}