mwbot 0.7.1

A MediaWiki bot framework
Documentation
// SPDX-FileCopyrightText: 2023 Misato Kano <me@mirror-kt.dev>
// SPDX-License-Identifier: GPL-3.0-or-later
//! Generators related to wikilink usage
//!
//! See the [`AllLinksInPage`] and [`LinksHere`] type documentation
//! for specifics.
use super::{Generator, ParamValue, SortDirection};

/// Get all links on the provided page(s).
///
/// See [API documentation](https://www.mediawiki.org/wiki/API:Links) for more details.
#[derive(Generator)]
#[params(generator = "links", gllimit = "max")]
pub struct AllLinksInPage {
    #[param("gplnamespace")]
    namespaces: Option<Vec<u32>>,
    // For some reason, it doesn't work with `gpltitles`, but it works with `titles`.
    #[param("titles")]
    titles: Vec<String>,
    #[param("gpldir")]
    dir: Option<SortDirection>,
}

/// Get all pages that link to the given pages.
///
/// See [API documentation](https://www.mediawiki.org/wiki/API:Linkshere) for more details.
#[derive(Generator)]
#[params(generator = "linkshere", glhlimit = "max")]
pub struct LinksHere {
    #[param("titles")]
    titles: Vec<String>,
    #[param("glhnamespace")]
    namespaces: Option<Vec<u32>>,
    #[param("glhshow")]
    filter: Option<Filter>,
}

pub enum Filter {
    Redirect,
    NonRedirect,
}

impl ParamValue for Filter {
    fn stringify(&self) -> String {
        match self {
            Self::Redirect => "redirect",
            Self::NonRedirect => "!redirect",
        }
        .to_string()
    }
}

/// Get all pages that link to certain URL.
///
/// See [API documentation](https://www.mediawiki.org/wiki/API:Exturlusage) for more details.
#[derive(Generator)]
#[params(generator = "exturlusage", geuprop = "title", geulimit = "max")]
pub struct LinkSearch {
    #[param("geuprotocol")]
    protocol: Option<String>,
    #[param("geuquery")]
    query: Option<String>,
    #[param("geunamespace")]
    namespaces: Option<Vec<u32>>,
}

#[cfg(test)]
mod tests {
    use parsoid::WikinodeIterator;

    use super::*;
    use crate::tests::testwp;

    #[tokio::test]
    async fn test_all_links_in_page() {
        let bot = testwp().await;
        let gen = AllLinksInPage::new(vec!["Mwbot-rs/Linked1".to_string()]);
        dbg!(gen.params());

        let mut pages = gen.generate(&bot);
        let mut found = Vec::new();

        while let Some(page) = pages.recv().await {
            let page = page.unwrap();
            found.push(page.title().to_string());
        }

        assert_eq!(found, ["Mwbot-rs/Linked2"])
    }

    #[tokio::test]
    async fn test_links_here() {
        let bot = testwp().await;
        let gen = LinksHere::new(vec!["Mwbot-rs/Linked2".to_string()]);
        dbg!(gen.params());

        let mut pages = gen.generate(&bot);
        let mut found = Vec::new();

        while let Some(page) = pages.recv().await {
            let page = page.unwrap();
            found.push(page.title().to_string());
        }

        assert_eq!(found, ["Mwbot-rs/Linked1"]);
    }

    #[tokio::test]
    async fn test_link_search() {
        let bot = testwp().await;
        let gen = LinkSearch::new().query("www.mediawiki.org");

        let mut pages = gen.generate(&bot);
        let mut count = 0;

        while let Some(page) = pages.recv().await {
            let page = page.unwrap();
            if page.title().ends_with(".js") || page.title().ends_with(".css") {
                continue;
            }
            dbg!(page.title());
            let html = page.html().await.unwrap().into_mutable();
            assert!(html.filter_external_links().into_iter().any(|node| {
                dbg!(&node.target());
                node.target().contains("www.mediawiki.org")
            }));

            if count >= 5 {
                break;
            }
            count += 1;
        }
    }
}