tools_interface/
sparql_rc.rs

1/// # SparqlRC
2/// Module for interacting with the [SparqlRC tool](https://wikidata-todo.toolforge.org/sparql_rc.php).
3/// You can retrieve a list of missing topics for a page or category.
4/// There are blocking and async methods available.
5///
6/// ## Example
7/// ```ignore
8/// let mut rc = SparqlRC::new("SELECT ?q { ?q wdt:P31 wd:Q23413 }")
9///     .start(NaiveDate::from_ymd_opt(2024, 5, 1).unwrap().into())
10///     .end(NaiveDate::from_ymd_opt(2024, 5, 2).unwrap().into());
11/// rc.run().await.unwrap();
12/// rc.results()
13///     .iter()
14///     .for_each(|entity_edit| {
15///        println!("Entity changed: {}", entity_edit.id);
16///     });
17/// ```
18use crate::{Tool, ToolsError};
19use async_trait::async_trait;
20use chrono::NaiveDateTime;
21use serde_json::Value;
22
23#[derive(Debug, Default, PartialEq)]
24pub struct EntityEditor {
25    pub id: u64,
26    pub name: String,
27    pub edits: u64,
28}
29
30impl EntityEditor {
31    fn from_json(j: &Value) -> Option<Self> {
32        Some(Self {
33            id: j["user_id"].as_str().and_then(|s| s.parse().ok())?,
34            name: j["user_text"].as_str()?.to_string(),
35            edits: j["edits"].as_u64()?,
36        })
37    }
38}
39
40#[derive(Debug, Default, PartialEq)]
41pub struct EntityEdit {
42    pub id: String,
43    pub label: String,
44    pub comment: Option<String>,
45    pub msg: Option<String>,
46    pub diff_html: Option<String>,
47    pub editors: Vec<EntityEditor>,
48    pub ts_before: NaiveDateTime,
49    pub ts_after: NaiveDateTime,
50    pub changed: bool,
51    pub created: bool,
52    pub reverted: bool,
53}
54
55impl EntityEdit {
56    fn from_json(j: &Value) -> Option<Self> {
57        let ret = Self {
58            id: j["id"].as_str().map(|s| s.to_string())?,
59            label: j["label"].as_str().map(|s| s.to_string())?,
60            comment: j["comment"].as_str().map(|s| s.to_string()),
61            msg: j["msg"].as_str().map(|s| s.to_string()),
62            diff_html: j["diff"].as_str().map(|s| s.to_string()),
63            editors: Self::parse_editors(&j["editors"]),
64            ts_before: Self::parse_date(&j["ts_before"])?,
65            ts_after: Self::parse_date(&j["ts_after"])?,
66            changed: j["changed"].as_bool().unwrap_or(false),
67            created: j["created"].as_bool().unwrap_or(false),
68            reverted: j["reverted"].as_bool().unwrap_or(false),
69        };
70        Some(ret)
71    }
72
73    fn parse_date(j: &Value) -> Option<NaiveDateTime> {
74        let date = j.as_str()?;
75        NaiveDateTime::parse_from_str(date, "%Y%m%d%H%M%S").ok()
76    }
77
78    fn parse_editors(j: &Value) -> Vec<EntityEditor> {
79        j.as_array()
80            .map(|a| a.iter().filter_map(EntityEditor::from_json).collect())
81            .unwrap_or_default()
82    }
83}
84
85#[derive(Debug, Default, PartialEq)]
86pub struct SparqlRC {
87    sparql: String,
88    start: Option<NaiveDateTime>,
89    end: Option<NaiveDateTime>,
90    languages: Vec<String>,
91    no_bot_edits: bool,
92    skip_unchanged: bool,
93
94    tool_url: String,
95    results: Vec<EntityEdit>,
96}
97
98impl SparqlRC {
99    /// Create a new SparqlRC object with the given SPARQL query.
100    /// The first variable in the SPARQL select statement must be the entity ID, and named "?q".
101    pub fn new(sparql: &str) -> Self {
102        Self {
103            sparql: sparql.into(),
104            tool_url: "https://wikidata-todo.toolforge.org/sparql_rc.php".into(),
105            ..Default::default()
106        }
107    }
108
109    /// Set the start date for the query. This is mandatory.
110    pub fn start(mut self, start: NaiveDateTime) -> Self {
111        self.start = Some(start);
112        self
113    }
114
115    /// Set the end date for the query.
116    pub fn end(mut self, end: NaiveDateTime) -> Self {
117        self.end = Some(end);
118        self
119    }
120
121    fn date2string(dt: &Option<NaiveDateTime>) -> String {
122        dt.map(|d| d.format("%Y%m%d%H%M%S").to_string())
123            .unwrap_or("".to_string())
124    }
125
126    fn generate_paramters(&self) -> Result<Vec<(String, String)>, ToolsError> {
127        let parameters: Vec<(String, String)> = [
128            ("sparql".into(), self.sparql.clone()),
129            ("start".into(), Self::date2string(&self.start)),
130            ("end".into(), Self::date2string(&self.end)),
131            ("user_lang".into(), self.languages.join(",")),
132            ("no_bots".into(), (self.no_bot_edits as u8).to_string()),
133            (
134                "skip_unchanged".into(),
135                (self.skip_unchanged as u8).to_string(),
136            ),
137            ("format".into(), "json".into()),
138        ]
139        .into();
140        Ok(parameters)
141    }
142
143    fn check_start_date(&self) -> Result<(), ToolsError> {
144        match self.start {
145            Some(_) => Ok(()),
146            None => Err(ToolsError::Tool(
147                "SparqlRC start date is not set".to_string(),
148            )),
149        }
150    }
151
152    /// Get the results of the last query.
153    pub fn results(&self) -> &[EntityEdit] {
154        &self.results
155    }
156}
157
158#[async_trait]
159impl Tool for SparqlRC {
160    #[cfg(feature = "tokio")]
161    /// Run the query asynchronously.
162    async fn run(&mut self) -> Result<(), ToolsError> {
163        self.check_start_date()?;
164        let url = &self.tool_url;
165        let parameters = self.generate_paramters()?;
166        let client = crate::ToolsInterface::tokio_client()?;
167        let response = client.get(url).query(&parameters).send().await?;
168        let j: Value = response.json().await?;
169        self.set_from_json(j)
170    }
171
172    #[cfg(feature = "blocking")]
173    /// Run the query in a blocking manner.
174    fn run_blocking(&mut self) -> Result<(), ToolsError> {
175        self.check_start_date()?;
176        let url = &self.tool_url;
177        let parameters = self.generate_paramters()?;
178        let client = crate::ToolsInterface::blocking_client()?;
179        let j: Value = client.get(url).query(&parameters).send()?.json()?;
180        self.set_from_json(j)
181    }
182
183    fn set_from_json(&mut self, j: Value) -> Result<(), ToolsError> {
184        if j["status"].as_str() != Some("OK") {
185            return Err(ToolsError::Tool(format!(
186                "SparqlRC status is not OK: {:?}",
187                j["status"]
188            )));
189        }
190        self.results = j["items"]
191            .as_array()
192            .ok_or(ToolsError::Json("['items'] has no array".into()))?
193            .iter()
194            .filter_map(EntityEdit::from_json)
195            .collect();
196        Ok(())
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203    use chrono::NaiveDate;
204    use std::fs::File;
205    use wiremock::matchers::{method, path, query_param_contains};
206    use wiremock::{Mock, MockServer, ResponseTemplate};
207
208    async fn get_mock_server() -> MockServer {
209        let file = File::open("test_data/sparql_rc.json").expect("file not found");
210        let j: Value = serde_json::from_reader(file).expect("error while reading file");
211        let mock_path = "/sparql_rc.php";
212        let mock_server = MockServer::start().await;
213        Mock::given(method("GET"))
214            .and(query_param_contains(
215                "sparql",
216                "SELECT ?q { ?q wdt:P31 wd:Q23413 }",
217            ))
218            .and(query_param_contains("start", "20240501000000"))
219            .and(query_param_contains("end", "20240502000000"))
220            .and(query_param_contains("no_bots", "0"))
221            .and(query_param_contains("skip_unchanged", "0"))
222            .and(query_param_contains("format", "json"))
223            .and(path(mock_path))
224            .respond_with(ResponseTemplate::new(200).set_body_json(j))
225            .mount(&mock_server)
226            .await;
227        mock_server
228    }
229
230    #[cfg(feature = "tokio")]
231    #[tokio::test]
232    async fn test_sparql_rc_async() {
233        let mock_server = get_mock_server().await;
234        let mut rc = SparqlRC::new("SELECT ?q { ?q wdt:P31 wd:Q23413 }")
235            .start(NaiveDate::from_ymd_opt(2024, 5, 1).unwrap().into())
236            .end(NaiveDate::from_ymd_opt(2024, 5, 2).unwrap().into());
237        rc.tool_url = format!("{}/sparql_rc.php", mock_server.uri());
238        rc.run().await.unwrap();
239        assert_eq!(rc.results().len(), 26);
240        assert_eq!(rc.results()[0].id, "Q121134008");
241        assert_eq!(rc.results()[0].label, "Castelluzzo");
242        assert_eq!(rc.results()[0].editors.len(), 3);
243    }
244}
245
246// https://wikidata-todo.toolforge.org/sparql_rc.php?sparql=SELECT+%3Fq+{+%3Fq+wdt%3AP31+wd%3AQ23413+}&start=20240501&end=20240502&user_lang=&sort_mode=last_edit&no_bots=1&skip_unchanged=1&format=json