mwbot 0.7.1

A MediaWiki bot framework
Documentation
/*
Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
//! ## Page generators
//!
//! Page generators are used to get lists of pages to operate on.
//! For example, you could get a list of pages in a category or pages
//! that use a specific template.
//!
//! Most generators return a [`Page`], but others may return
//! other types, for example Revisions returns revision metadata.
//!
//! ## Basic usage
//!
//! All generators follow the same basic structure:
//! * a `new()` constructor that takes mandatory parameters
//! * optional functions to supply optional parameters
//! * a `generate()` function to start fetching pages
//!
//! Rust has mediocre support for asynchronous iterators, so
//! we take the approach of spawning a background task to fetch
//! pages and use a channel to pass the results back. (This is
//! why generators don't work in WebAssembly.)
//!
//! ## Example
//!
//! To fetch a list of pages in the category
//! [Free software programmed in Rust](https://en.wikipedia.org/wiki/Category:Free_software_programmed_in_Rust):
//! ```
//! # use mwbot::{Bot, Result};
//! use mwbot::generators::{categories::CategoryMembers, Generator};
//!
//! # async fn wrapper() -> Result<()> {
//! let bot = Bot::from_default_config().await.unwrap();
//! let mut generator = CategoryMembers::new("Category:Free software programmed in Rust");
//! let mut pages = generator.generate(&bot);
//! while let Some(page) = pages.recv().await {
//!     let page = page?;
//!     println!("{}", page.title());
//! }
//! # Ok(())
//! # }
//! ```
//!
pub mod allpages;
pub mod allredirects;
pub mod categories;
pub mod file;
pub mod langlinks;
pub mod link;
#[cfg(feature = "linter")]
#[cfg_attr(docsrs, doc(cfg(feature = "linter")))]
pub mod lint;
pub mod log_events;
pub mod lonely;
pub mod pageswithprop;
pub mod querypage;
pub mod random;
pub mod recent_changes;
pub mod revisions;
pub mod search;
pub mod templates;
pub mod transcluded;
pub mod uncategorized;
#[cfg(feature = "wikibase")]
#[cfg_attr(docsrs, doc(cfg(feature = "wikibase")))]
pub mod unconnected_pages;
pub mod unused;
pub mod unwatched;
pub mod user_contribs;
mod value;

// for #[derive(Generator)] macro
#[doc(hidden)]
pub mod __exports;

use crate::page::InfoResponseItem;
use crate::{Bot, Page, Result};
use categories::CategoryMembers;
pub use mwbot_derive::Generator;
use std::collections::HashMap;
use tokio::sync::mpsc::{self, Receiver};
pub use value::ParamValue;

/// A structure to manage query parameters, but keeps continuation separate
#[doc(hidden)]
#[derive(Default, Debug)]
pub struct Params {
    pub main: HashMap<String, String>,
    pub continue_: HashMap<String, String>,
}

impl Params {
    /// Merge the main and continuation parameters into one map
    pub fn merged(&self) -> HashMap<&String, &String> {
        let mut map = HashMap::new();
        map.extend(&self.main);
        map.extend(&self.continue_);
        map
    }
}

/// Recursively get pages that are in given category.
///
/// Pages may be returned multiple times if they're present in
/// multiple categories.
///
/// To prevent infinite loops, the generator keeps track of categories seen
/// and will not recurse through it multiple times.
pub fn categorymembers_recursive(
    bot: &Bot,
    title: &str,
) -> Receiver<Result<Page>> {
    let (tx, rx) = mpsc::channel(50);
    let title = title.to_string();
    let bot = bot.clone();
    tokio::spawn(async move {
        // Categories we've already seen
        let mut seen = vec![];
        // Categories that are pending
        let mut pending = vec![title];
        while let Some(category) = pending.pop() {
            // Mark as having seen it to stop loops
            seen.push(category.to_string());
            #[allow(deprecated)]
            let mut gen = CategoryMembers::new(&category).generate(&bot);
            while let Some(page) = gen.recv().await {
                if let Ok(page) = &page {
                    if page.is_category()
                        && !seen.contains(&page.title().to_string())
                        && !pending.contains(&page.title().to_string())
                    {
                        pending.push(page.title().to_string());
                    }
                }
                if tx.send(page).await.is_err() {
                    // Receiver hung up, just abort
                    return;
                }
            }
        }
    });
    rx
}

/// Get pages that transclude the given template
///
/// See [API documentation](https://www.mediawiki.org/wiki/API:Embeddedin) for more details.
#[derive(Generator)]
#[params(generator = "embeddedin", geilimit = "max")]
pub struct EmbeddedIn {
    /// Title of the template
    #[param("geititle")]
    title: String,
    /// Get results from pages in these namespaces
    #[param("geinamespace")]
    namespace: Option<Vec<u32>>,
    /// Whether and how to filter redirects
    #[param("geifilterredir")]
    filter_redirect: Option<FilterRedirect>,
    /// Direction to get results in
    #[param("geidir")]
    dir: Option<SortDirection>,
}

#[derive(Copy, Clone, Default)]
pub enum FilterRedirect {
    #[default]
    All,
    Nonredirects,
    Redirects,
}

impl ParamValue for FilterRedirect {
    fn stringify(&self) -> String {
        match self {
            Self::All => "all",
            Self::Nonredirects => "nonredirects",
            Self::Redirects => "redirects",
        }
        .to_string()
    }
}

#[derive(Copy, Clone, Debug, Default)]
pub enum SortDirection {
    #[default]
    Ascending,
    Descending,
}

impl ParamValue for SortDirection {
    fn stringify(&self) -> String {
        match self {
            Self::Ascending => "ascending",
            Self::Descending => "descending",
        }
        .to_string()
    }
}

/// Derivable trait that implements a straightforward builder to construct
/// API parameters to generate a list of pages. Using the MediaWiki API's
/// generator feature, it preloads basic metadata about pages to speed up
/// initial processing.
///
/// Generators not included in the mwbot library can also be implemented by users derive this trait.
/// `#[generator(...)]` is basic settings related to Generator.
///
/// ## Parameters for `#[generator(...)]`
/// ### `crate`
/// | Type                                                   | Default                                       |
/// | ------------------------------------------------------ | --------------------------------------------- |
/// | [path](https://doc.rust-lang.org/reference/paths.html) | `"crate"` (for internal use in `mwbot` crate) |
///
/// `mwbot` crate location. In most cases, specify `"mwbot"`, but if you are using a `mwbot` that has been renamed in Cargo.toml or re-exported, specify its path.
///
/// ### `return_type`
/// | Type                                                   | Default          |
/// | ------------------------------------------------------ | ---------------- |
/// | [type](https://doc.rust-lang.org/reference/types.html) | [`"Page"`](Page) |
///
/// The type of the Item returned from this Generator.
///
/// ### `response_type`
/// | Type                                                   | Default                     |
/// | ------------------------------------------------------ | --------------------------- |
/// | [type](https://doc.rust-lang.org/reference/types.html) | `"mwbot::page::InfoResponse"` |
///
/// Type representing the MediaWiki API response. [`mwapi_responses::ApiResponse`] must be implemented.
///
/// ### `transform_fn`
/// | Type                                                   | Default                                  |
/// | ------------------------------------------------------ | ---------------------------------------- |
/// | [path](https://doc.rust-lang.org/reference/paths.html) | `"mwbot::generators::transform_to_page"` |
///
/// Transform response item to return type.
/// It must satisfy `fn(bot: &mwbot::Bot, item: T) -> mwbot::Result<R>`, `T` is ApiResponse's item type, `R` is return_type.
///
/// ## Examples
/// ```rust
/// use mwapi_responses::query;
/// use mwbot::{Bot, Result};
/// use mwbot::generators::Generator;
///
/// #[query(prop = "info|categories", inprop = "associatedpage|url")]
/// struct MyPageInfoResponse;
///
/// #[derive(Generator)]
/// #[generator(
///     crate = "mwbot",
///     return_type = "MyPageInfoResponseItem",
///     response_type = "MyPageInfoResponse",
///     transform_fn = "noop_transform"
/// )]
/// #[params(generator = "allpages", gaplimit = "max")]
/// struct MyAllPageGenerator {}
///
/// fn noop_transform(
///     _bot: &Bot,
///     item: MyPageInfoResponseItem
/// ) -> Result<MyPageInfoResponseItem> {
///     Ok(item)
/// }
/// ```
pub trait Generator: Sized {
    type Output;

    /// Map of API parameters
    fn params(&self) -> HashMap<&'static str, String>;

    /// Start the generator and get a receiver back to get
    /// a list of pages back asynchronously
    fn generate(self, bot: &Bot) -> Receiver<Self::Output>;
}

// for #[derive(Generator)] macro
#[doc(hidden)]
pub fn transform_to_page(bot: &Bot, item: InfoResponseItem) -> Result<Page> {
    bot.page(&item.title).inspect(|page| {
        // unwrap: We just created the page, it's impossible for
        // another thread to be trying to set metadata
        page.info.set(item).unwrap();
    })
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::tests::testwp;

    #[tokio::test]
    async fn test_categorymembers_recursive() {
        let bot = testwp().await;
        let mut members = categorymembers_recursive(&bot, "Category:Mwbot-rs");
        let mut found = false;
        while let Some(page) = members.recv().await {
            if page.unwrap().title() == "Mwbot-rs/Categorized depth 1" {
                found = true;
            }
        }
        // We are also testing that this generator runs to completion and does
        // not get trapped in an infinte loop
        assert!(found, "Found depth 1 page");
    }

    #[tokio::test]
    async fn test_embeddedin() {
        let bot = testwp().await;
        let mut embeddedin =
            EmbeddedIn::new("Template:1x".to_string()).generate(&bot);
        let mut count = 0;
        while let Some(page) = embeddedin.recv().await {
            page.unwrap();
            count += 1;
            if count == 5 {
                break;
            }
        }
        assert_eq!(count, 5);
    }
}