tools_interface/
petscan.rs

1//! # PetScan
2//! This module provides a wrapper around the PetScan tool.
3//! You can perform a PetScan query via a PSID.
4//! There are blocking and async methods available.
5//!
6//! ## Example
7//! ```rust
8//! let mut ps = PetScan::new(12345); // Your PSID
9//! ps.parameters_mut().push(("foo".to_string(), "bar".to_string())); // Override parameters from the PSID
10//! ps.get().await.unwrap();
11//! let page_titles = ps.pages.iter().map(|page| page.page_title).collect::<Vec<_>>();
12//! ```
13
14use std::collections::HashMap;
15
16use crate::{Tool, ToolsError};
17use async_trait::async_trait;
18use serde::{Deserialize, Serialize};
19use serde_json::Value;
20
21#[derive(Debug, Default, PartialEq, Serialize, Deserialize)]
22pub struct PetScanFileUsage {
23    pub ns: i32,
24    pub page: String,
25    pub wiki: String,
26}
27
28#[derive(Debug, Default, PartialEq, Serialize, Deserialize)]
29pub struct PetScanMetadata {
30    // TODO defaultsort (fix JSON output upstream)
31    #[serde(default)]
32    pub coordinates: String, // Coordinates "lat/lon"
33    #[serde(default)]
34    pub image: String, // Page image
35    #[serde(default)]
36    pub wikidata: String, // Wikidata item
37    #[serde(default)]
38    pub disambiguation: bool, // Is disambiguation page
39    #[serde(default)]
40    pub fileusage: String,
41    #[serde(default)]
42    pub img_height: u64,
43    #[serde(default)]
44    pub img_width: u64,
45    #[serde(default)]
46    pub img_major_mime: String,
47    #[serde(default)]
48    pub img_media_type: String,
49    #[serde(default)]
50    pub img_minor_mime: String,
51    #[serde(default)]
52    pub img_sha1: String,
53    #[serde(default)]
54    pub img_size: u64,
55    #[serde(default)]
56    pub img_timestamp: String,
57    #[serde(default)]
58    pub img_user_text: String,
59}
60
61impl PetScanMetadata {
62    pub fn coordinates(&self) -> Option<(f64, f64)> {
63        let mut parts = self.coordinates.split('/');
64        let lat = parts.next()?.parse().ok()?;
65        let lon = parts.next()?.parse().ok()?;
66        Some((lat, lon))
67    }
68}
69
70#[derive(Debug, Default, PartialEq, Deserialize)]
71pub struct PetScanPage {
72    pub page_id: u32,
73    pub page_latest: String,
74    pub page_len: u32,
75    pub page_namespace: i64,
76    pub page_title: String,
77    #[serde(default)]
78    pub giu: Vec<PetScanFileUsage>,
79    #[serde(default)]
80    pub metadata: PetScanMetadata,
81}
82
83impl Into<mediawiki::title::Title> for PetScanPage {
84    fn into(self) -> mediawiki::title::Title {
85        let title_with_spaces = mediawiki::title::Title::underscores_to_spaces(&self.page_title);
86        mediawiki::title::Title::new(&title_with_spaces, self.page_namespace)
87    }
88}
89
90#[derive(Debug, Default, PartialEq)]
91pub struct PetScan {
92    psid: u32,
93    parameters: Vec<(String, String)>,
94    pages: Vec<PetScanPage>,
95    namespaces: HashMap<i32, String>,
96    query: Option<String>,
97    wiki: Option<String>,
98    status: Option<String>,
99}
100
101impl PetScan {
102    /// Create a new PetScan query with a PSID.
103    pub fn new(psid: u32) -> Self {
104        Self {
105            psid,
106            ..Default::default()
107        }
108    }
109
110    /// Get the mutable parameters for the future PetScan query.
111    /// You can override the parameters from the PSID this way.
112    pub fn parameters_mut(&mut self) -> &mut Vec<(String, String)> {
113        &mut self.parameters
114    }
115
116    /// Get the namespaces from the PetScan query.
117    pub fn pages(&self) -> &[PetScanPage] {
118        &self.pages
119    }
120
121    /// Get the (main) wiki from the PetScan query.
122    pub fn wiki(&self) -> Option<&String> {
123        self.wiki.as_ref()
124    }
125
126    /// Get the PetScan query that was run.
127    pub fn query(&self) -> Option<&String> {
128        self.query.as_ref()
129    }
130}
131
132#[async_trait]
133impl Tool for PetScan {
134    #[cfg(feature = "blocking")]
135    /// Perform a blocking PetScan query.
136    fn run_blocking(&mut self) -> Result<(), ToolsError> {
137        let url = format!("https://petscan.wmflabs.org/?psid={psid}&format=json&output_compatability=quick-intersection", psid=self.psid);
138        let client = crate::ToolsInterface::blocking_client()?;
139        let j: Value = client.get(&url).query(&self.parameters).send()?.json()?;
140        self.from_json(j)
141    }
142
143    #[cfg(feature = "tokio")]
144    /// Get the PetScan query asynchronously.
145    async fn run(&mut self) -> Result<(), ToolsError> {
146        let url = format!("https://petscan.wmflabs.org/?psid={psid}&format=json&output_compatability=quick-intersection", psid=self.psid);
147        let client = crate::ToolsInterface::tokio_client()?;
148        let j = client
149            .get(&url)
150            .query(&self.parameters)
151            .send()
152            .await?
153            .json()
154            .await?;
155        self.from_json(j)
156    }
157
158    fn from_json(&mut self, json: Value) -> Result<(), ToolsError> {
159        self.status = json["status"].as_str().map(|s| s.to_string());
160        if self.status != Some("OK".to_string()) {
161            return Err(ToolsError::Tool(format!(
162                "PetScan status is not OK: {:?}",
163                self.status
164            )));
165        }
166        self.query = json["query"].as_str().map(|s| s.to_string());
167        self.namespaces = json["namespaces"]
168            .as_object()
169            .ok_or(ToolsError::Json("['namespaces'] has no object".into()))?
170            .iter()
171            .map(|(k, v)| (k.parse().unwrap(), v.as_str().unwrap().to_string()))
172            .collect();
173        self.wiki = json["wiki"].as_str().map(|s| s.to_string());
174        for page_json in json["pages"]
175            .as_array()
176            .ok_or(ToolsError::Json("['pages'] has no array".into()))?
177        {
178            let page: PetScanPage = serde_json::from_value(page_json.clone())?;
179            self.pages.push(page);
180        }
181        Ok(())
182    }
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn test_petscan_new() {
191        let ps = PetScan::new(123);
192        assert_eq!(ps.psid, 123);
193        assert_eq!(ps.pages, vec![]);
194    }
195
196    #[cfg(feature = "blocking")]
197    #[test]
198    fn test_petscan_get_blocking() {
199        let mut ps = PetScan::new(25951472);
200        ps.run_blocking().unwrap();
201        assert_eq!(ps.pages.len(), 1);
202        assert_eq!(ps.pages[0].page_id, 3361346);
203        assert_eq!(ps.pages[0].page_title, "Magnus_Manske");
204    }
205
206    #[cfg(feature = "tokio")]
207    #[tokio::test]
208    async fn test_pagepile_get_async() {
209        let mut ps = PetScan::new(25951472);
210        ps.run().await.unwrap();
211        assert_eq!(ps.pages.len(), 1);
212        assert_eq!(ps.pages[0].page_id, 3361346);
213        assert_eq!(ps.pages[0].page_title, "Magnus_Manske");
214    }
215
216    #[cfg(feature = "blocking")]
217    #[test]
218    fn test_petscan_get_blocking_file() {
219        let mut ps = PetScan::new(28348161);
220        ps.run_blocking().unwrap();
221        let expected_giui = PetScanFileUsage {
222            ns: 0,
223            page: "St._Laurentius_(Wald-Michelbach)".to_string(),
224            wiki: "dewiki".to_string(),
225        };
226        assert!(ps.pages[0].giu.iter().any(|giu| giu == &expected_giui));
227        assert!(ps.pages[0].giu.len() > 2);
228        assert!(!ps.pages[0].metadata.disambiguation);
229        assert_eq!(ps.pages[0].metadata.img_size, 796383);
230        assert_eq!(ps.pages[0].metadata.img_height, 1364);
231        assert_eq!(ps.pages[0].metadata.img_width, 964);
232        assert_eq!(ps.pages[0].page_id, 1166558);
233        assert_eq!(
234            ps.pages[0].page_title,
235            "Germany_wald-michelbach_catholic_church.jpg"
236        );
237    }
238
239    #[cfg(feature = "blocking")]
240    #[test]
241    fn test_petscan_get_blocking_metadata() {
242        let mut ps = PetScan::new(28348714);
243        ps.run_blocking().unwrap();
244        assert_eq!(ps.pages[0].page_id, 12115738);
245        assert_eq!(ps.pages[0].page_title, "St._Laurentius_(Wald-Michelbach)");
246        assert_eq!(
247            ps.pages[0].metadata.coordinates(),
248            Some((49.572731, 8.82455))
249        );
250        assert_eq!(
251            ps.pages[0].metadata.image,
252            "Germany_wald-michelbach_catholic_church.jpg"
253        );
254        assert_eq!(ps.pages[0].metadata.wikidata, "Q110825193");
255    }
256
257    #[test]
258    fn test_petscan_into_title() {
259        let ps = PetScanPage {
260            page_namespace: 0,
261            page_title: "Foo".to_string(),
262            ..Default::default()
263        };
264        let title: mediawiki::title::Title = ps.into();
265        assert_eq!(title, mediawiki::title::Title::new("Foo", 0));
266    }
267}