1use chrono::{Duration, NaiveDate};
37
38const DEFAULT_RETRY_DELAY_SECS: u64 = 5;
40use futures::prelude::*;
41use serde::Deserialize;
42use serde_json::Value;
43
44#[derive(Clone, Debug, PartialEq, Deserialize)]
45pub enum PageviewsAccess {
46 #[serde(rename = "all-access")]
47 All,
48 #[serde(rename = "desktop")]
49 Desktop,
50 #[serde(rename = "mobile-app")]
51 MobileApp,
52 #[serde(rename = "mobile-web")]
53 MobileWeb,
54}
55
56impl PageviewsAccess {
57 pub fn as_str(&self) -> &str {
58 match self {
59 Self::All => "all-access",
60 Self::Desktop => "desktop",
61 Self::MobileApp => "mobile-app",
62 Self::MobileWeb => "mobile-web",
63 }
64 }
65}
66
67#[derive(Clone, Debug, PartialEq, Deserialize)]
68pub enum PageviewsAgent {
69 #[serde(rename = "all-agents")]
70 All,
71 #[serde(rename = "user")]
72 User,
73 #[serde(rename = "spider")]
74 Spider,
75 #[serde(rename = "automated")]
76 Automated,
77}
78
79impl PageviewsAgent {
80 pub fn as_str(&self) -> &str {
81 match self {
82 Self::All => "all-agents",
83 Self::User => "user",
84 Self::Spider => "spider",
85 Self::Automated => "automated",
86 }
87 }
88}
89
90#[derive(Clone, Debug, PartialEq, Deserialize)]
91pub enum PageviewsGranularity {
92 #[serde(rename = "hourly")]
93 Hourly,
94 #[serde(rename = "daily")]
95 Daily,
96 #[serde(rename = "monthly")]
97 Monthly,
98}
99
100impl PageviewsGranularity {
101 pub fn as_str(&self) -> &str {
102 match self {
103 Self::Hourly => "hourly",
104 Self::Daily => "daily",
105 Self::Monthly => "monthly",
106 }
107 }
108}
109
110#[derive(Clone, Debug, PartialEq, Deserialize)]
111pub struct PageviewsTimestamp {
112 year: u16,
113 month: u8,
114 day: u8,
115 hour: u8,
116}
117
118impl From<&str> for PageviewsTimestamp {
119 fn from(item: &str) -> Self {
120 Self {
121 year: item[0..4].parse().unwrap(),
122 month: item[4..6].parse().unwrap(),
123 day: item[6..8].parse().unwrap(),
124 hour: item[8..10].parse().unwrap(),
125 }
126 }
127}
128
129impl From<PageviewsTimestamp> for String {
130 fn from(val: PageviewsTimestamp) -> Self {
131 format!(
132 "{:04}{:02}{:02}{:02}",
133 val.year, val.month, val.day, val.hour
134 )
135 }
136}
137
138#[derive(Clone, Debug, PartialEq)]
139pub struct PageviewsParams {
140 pub timestamp: PageviewsTimestamp,
141 pub views: u64,
142}
143
144impl PageviewsParams {
145 fn from_json(item: &Value) -> Option<Self> {
146 let ts = item.get("timestamp")?.as_str()?;
147 Some(Self {
148 timestamp: ts.into(),
149 views: item.get("views")?.as_u64()?,
150 })
151 }
152}
153
154#[derive(Clone, Debug, PartialEq)]
155pub struct PageviewsResult {
156 pub project: String,
157 pub article: String,
158 pub granularity: PageviewsGranularity,
159 pub access: PageviewsAccess,
160 pub agent: PageviewsAgent,
161 pub entries: Vec<PageviewsParams>,
162}
163
164impl PageviewsResult {
165 pub fn total_views(&self) -> u64 {
166 self.entries.iter().map(|r| r.views).sum::<u64>()
167 }
168
169 pub fn len(&self) -> usize {
170 self.entries.len()
171 }
172
173 pub fn is_empty(&self) -> bool {
174 self.len() == 0
175 }
176}
177
178#[derive(Debug, PartialEq)]
179pub struct Pageviews {
180 granularity: PageviewsGranularity,
181 access: PageviewsAccess,
182 agent: PageviewsAgent,
183}
184
185impl Pageviews {
186 pub fn month_start(year: i32, month: u32) -> Option<NaiveDate> {
188 NaiveDate::from_ymd_opt(year, month, 1)
189 }
190
191 pub fn month_end(year: i32, month: u32) -> Option<NaiveDate> {
193 let mut last_day_of_month = NaiveDate::from_ymd_opt(year, month + 1, 1)
194 .or(NaiveDate::from_ymd_opt(year + 1, 1, 1))?;
195 last_day_of_month -= Duration::days(1);
196 Some(last_day_of_month)
197 }
198
199 pub fn new(
201 granularity: PageviewsGranularity,
202 access: PageviewsAccess,
203 agent: PageviewsAgent,
204 ) -> Self {
205 Self {
206 granularity,
207 access,
208 agent,
209 }
210 }
211
212 #[cfg(feature = "tokio")]
213 pub async fn get_per_article<S1: Into<String>, S2: Into<String>>(
217 &self,
218 page: S1,
219 project: S2,
220 start: &NaiveDate,
221 end: &NaiveDate,
222 ) -> Result<PageviewsResult, crate::ToolsError> {
223 let project: String = project.into();
224 let page: String = page.into().replace(" ", "_");
225 let url = format!(
226 "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/{project}/{access}/{agent}/{page}/{granularity}/{start}/{end}",
227 access = self.access.as_str(),
228 agent = self.agent.as_str(),
229 granularity = self.granularity.as_str(),
230 start = start.format("%Y%m%d"),
231 end = end.format("%Y%m%d"),
232 );
233 let client = crate::ToolsInterface::tokio_client()?;
234 let json: Value;
235 loop {
236 let response = client.get(&url).send().await?;
237 let status = response.status();
238 if status == 429 {
239 let delay = response
241 .headers()
242 .get("Retry-After")
243 .and_then(|s| s.to_str().ok())
244 .and_then(|s| s.parse().ok())
245 .unwrap_or(DEFAULT_RETRY_DELAY_SECS);
246 tokio::time::sleep(tokio::time::Duration::from_secs(delay)).await;
247 continue;
248 }
249 json = response.json().await?;
250 break;
251 }
252 if json.get("status").is_some() {
253 let message = match json.get("detail") {
254 Some(detail) => match detail.as_str() {
255 Some(detail_str) => detail_str.to_string(),
256 None => detail.to_string(), },
258 None => json["status"].to_string(), };
260 return Err(crate::ToolsError::Tool(message));
261 }
262 let items = json
263 .get("items")
264 .ok_or_else(|| crate::ToolsError::Json("No 'items' in Pageviews JSON".to_string()))?
265 .as_array()
266 .ok_or_else(|| {
267 crate::ToolsError::Json("'items' is not an array in Pageviews JSON".to_string())
268 })?;
269 let ret = PageviewsResult {
270 project,
271 article: page,
272 granularity: self.granularity.to_owned(),
273 access: self.access.to_owned(),
274 agent: self.agent.to_owned(),
275 entries: items
276 .iter()
277 .filter_map(PageviewsParams::from_json)
278 .collect(),
279 };
280 Ok(ret)
281 }
282
283 #[cfg(feature = "tokio")]
284 pub async fn get_multiple_articles(
289 &self,
290 project_pages: &Vec<(String, String)>,
291 start: &NaiveDate,
292 end: &NaiveDate,
293 max_concurrent: usize,
294 ) -> Result<Vec<PageviewsResult>, crate::ToolsError> {
295 let mut futures = Vec::new();
296 for (project, page) in project_pages {
297 let fut = self.get_per_article(page, project, start, end);
298 futures.push(fut);
299 }
300 let stream = futures::stream::iter(futures).buffer_unordered(max_concurrent);
301 let results = stream.collect::<Vec<_>>().await;
302 Ok(results.into_iter().filter_map(|r| r.ok()).collect())
303 }
304
305 }
309
310#[cfg(test)]
311mod tests {
312 use super::*;
313
314 #[test]
315 #[rustfmt::skip]
316 fn test_last_of_month() {
317 assert_eq!(Pageviews::month_end(2021, 1).unwrap().format("%Y-%m-%d").to_string(), "2021-01-31");
318 assert_eq!(Pageviews::month_end(2021, 2).unwrap().format("%Y-%m-%d").to_string(), "2021-02-28");
319 assert_eq!(Pageviews::month_end(2024, 2).unwrap().format("%Y-%m-%d").to_string(), "2024-02-29"); assert_eq!(Pageviews::month_end(2021, 3).unwrap().format("%Y-%m-%d").to_string(), "2021-03-31");
321 assert_eq!(Pageviews::month_end(2021, 4).unwrap().format("%Y-%m-%d").to_string(), "2021-04-30");
322 assert_eq!(Pageviews::month_end(2021, 5).unwrap().format("%Y-%m-%d").to_string(), "2021-05-31");
323 assert_eq!(Pageviews::month_end(2021, 6).unwrap().format("%Y-%m-%d").to_string(), "2021-06-30");
324 assert_eq!(Pageviews::month_end(2021, 7).unwrap().format("%Y-%m-%d").to_string(), "2021-07-31");
325 assert_eq!(Pageviews::month_end(2021, 8).unwrap().format("%Y-%m-%d").to_string(), "2021-08-31");
326 assert_eq!(Pageviews::month_end(2021, 9).unwrap().format("%Y-%m-%d").to_string(), "2021-09-30");
327 assert_eq!(Pageviews::month_end(2021, 10).unwrap().format("%Y-%m-%d").to_string(), "2021-10-31");
328 assert_eq!(Pageviews::month_end(2021, 11).unwrap().format("%Y-%m-%d").to_string(), "2021-11-30");
329 assert_eq!(Pageviews::month_end(2021, 12).unwrap().format("%Y-%m-%d").to_string(), "2021-12-31");
330 }
331
332 #[cfg(feature = "tokio")]
333 #[tokio::test]
334 async fn test_pageviews_get_per_article_monthly_async() {
335 let pv = Pageviews::new(
336 PageviewsGranularity::Monthly,
337 PageviewsAccess::All,
338 PageviewsAgent::All,
339 );
340 let result = pv
341 .get_per_article(
342 "Barack_Obama",
343 "de.wikipedia",
344 &Pageviews::month_start(2016, 1).unwrap(),
345 &Pageviews::month_end(2016, 12).unwrap(),
346 )
347 .await
348 .unwrap();
349 assert_eq!(result.len(), 12);
350 assert_eq!(result.total_views(), 1_550_502);
351 }
352
353 #[cfg(feature = "tokio")]
354 #[tokio::test]
355 async fn test_pageviews_get_per_article_daily_async() {
356 let pv = Pageviews::new(
357 PageviewsGranularity::Daily,
358 PageviewsAccess::All,
359 PageviewsAgent::All,
360 );
361 let result = pv
362 .get_per_article(
363 "Barack_Obama",
364 "de.wikipedia",
365 &Pageviews::month_start(2016, 1).unwrap(),
366 &Pageviews::month_end(2016, 1).unwrap(),
367 )
368 .await
369 .unwrap();
370 assert_eq!(result.len(), 31);
371 assert_eq!(result.total_views(), 112_458);
372 }
373
374 #[cfg(feature = "tokio")]
375 #[tokio::test]
376 async fn test_pageviews_get_per_article_bad_date_async() {
377 let pv = Pageviews::new(
378 PageviewsGranularity::Daily,
379 PageviewsAccess::All,
380 PageviewsAgent::All,
381 );
382 let result = pv
383 .get_per_article(
384 "Barack_Obama",
385 "de.wikipedia",
386 &Pageviews::month_start(1016, 1).unwrap(),
387 &Pageviews::month_end(1016, 1).unwrap(),
388 )
389 .await;
390 assert!(result.is_err());
391 }
392
393 #[cfg(feature = "tokio")]
394 #[tokio::test]
395 async fn test_pageviews_multiple_articles_async() {
396 let pv = Pageviews::new(
397 PageviewsGranularity::Monthly,
398 PageviewsAccess::All,
399 PageviewsAgent::All,
400 );
401 let project_pages = [
402 ("de.wikipedia", "Barack Obama"),
403 ("de.wikipedia", "Trude Herr"),
404 ]
405 .into_iter()
406 .map(|(a, b)| (a.into(), b.into()))
407 .collect();
408 let results = pv
409 .get_multiple_articles(
410 &project_pages,
411 &Pageviews::month_start(2016, 1).unwrap(),
412 &Pageviews::month_end(2016, 12).unwrap(),
413 5,
414 )
415 .await
416 .unwrap();
417 assert_eq!(results.len(), 2);
418 let overall_views: u64 = results.iter().map(|r| r.total_views()).sum();
419 assert_eq!(overall_views, 1_670_723);
420 }
421
422 #[test]
423 fn test_pageviews_timestamp() {
424 let time_string = "2345123159";
425 let ts: PageviewsTimestamp = time_string.into();
426 let ts: String = ts.into();
427 assert_eq!(ts, time_string);
428 }
429}