mwtitle 0.2.0-alpha.2

MediaWiki title validation and formatting
Documentation
/*
Copyright (C) 2021 Erutuon

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
use std::fmt::Display;

#[derive(Clone, Copy, Debug)]
#[repr(u8)]
pub enum TitleWhitespace {
    Spaces,
    Underscores,
}

impl TitleWhitespace {
    pub(crate) const fn char(self) -> char {
        match self {
            TitleWhitespace::Spaces => ' ',
            TitleWhitespace::Underscores => '_',
        }
    }

    pub(crate) const fn other_char(self) -> char {
        match self {
            TitleWhitespace::Spaces => '_',
            TitleWhitespace::Underscores => ' ',
        }
    }
}

/// Used internally to display spaces as underscores and vice-versa.
/// The only valid values for C are `' '` and `'_'`.
struct WhitespaceDisplayer<'a>(pub(crate) &'a str, pub(crate) TitleWhitespace);

impl<'a> Display for WhitespaceDisplayer<'a> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let Self(s, whitespace) = self;
        use std::fmt::Write as _;
        // SAFETY: string indexing won't panic
        // because str::match_indices always returns valid char boundary.
        let mut last_pos = None;
        for (pos, _) in s.match_indices(whitespace.other_char()) {
            f.write_str(&s[last_pos.replace(pos + 1).unwrap_or(0)..pos])?;
            f.write_char(whitespace.char())?;
        }
        if last_pos
            // If the last whitespace character was not at the end of the string,
            // write everything after it.
            .map(|last| last < s.len())
            // If there were no whitespace characters, write the whole string.
            .unwrap_or(true)
        {
            f.write_str(&s[last_pos.unwrap_or(0)..s.len()])?;
        }
        Ok(())
    }
}

#[test]
fn whitespace_displayer_displays_spaces_as_underscores() {
    for (input, expected) in [
        (" ", "_"),
        ("   ", "___"),
        ("a b", "a_b"),
        (" a", "_a"),
        ("a ", "a_"),
        (" a b ", "_a_b_"),
        ("a  b", "a__b"),
        ("  ab", "__ab"),
        ("ab  ", "ab__"),
        ("  a  b", "__a__b"),
        ("  a  b  ", "__a__b__"),
    ] {
        for input in [input, &input.replace(' ', "_")] {
            assert_eq!(
                &WhitespaceDisplayer(input, TitleWhitespace::Underscores)
                    .to_string(),
                expected,
                "\n{:?}",
                input
            );
        }
    }
}

pub(crate) struct TitleDisplay<'a> {
    pub(crate) interwiki: Option<&'a str>,
    pub(crate) namespace: Option<&'a str>,
    pub(crate) dbkey: &'a str,
    pub(crate) fragment: Option<&'a str>,
    pub(crate) whitespace: TitleWhitespace,
}

impl<'a> Display for TitleDisplay<'a> {
    fn fmt(
        &self,
        f: &mut std::fmt::Formatter<'_>,
    ) -> std::result::Result<(), std::fmt::Error> {
        let whitespace_displayer = |s| WhitespaceDisplayer(s, self.whitespace);
        if let Some(interwiki) = self.interwiki {
            whitespace_displayer(interwiki).fmt(f)?;
            f.write_str(":")?;
        }
        if let Some(namespace) = self.namespace {
            whitespace_displayer(namespace).fmt(f)?;
            f.write_str(":")?;
        }
        whitespace_displayer(self.dbkey).fmt(f)?;
        if let Some(fragment) = self.fragment {
            f.write_str("#")?;
            fragment.fmt(f)?;
        }
        Ok(())
    }
}