1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
/*
Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

use crate::edit::{EditResponse, SaveOptions, Saveable};
use crate::parsoid::ImmutableWikicode;
use crate::{Bot, Error, Result, Title};
use mwapi_responses::prelude::*;
use once_cell::sync::OnceCell;
use serde_json::Value;
use std::sync::Arc;
use tokio::sync::OnceCell as AsyncOnceCell;
use tracing::info;

/// A `Page` represents a wiki page on a specific wiki (`Bot`). You can get
/// metadata about a page, its contents (in HTML or wikitext) and edit the
/// page.
///
/// Pages are obtained by calling `bot.page("<title>")?`. Each page is `Sync`
/// and designed to easily `Clone`d so it can be sent across multiple threads
/// for concurrent processing.
///
/// Most metadata lookups are internally batched and cached so it might not
/// reflect the live state on the wiki if someone has edited or modified the
/// page in the meantime. To get fresh information create a new `Page`
/// instance.
///
/// Saving a page will respect `{{nobots}}` (if not disabled), wait as needed
/// for the configured rate limit and automatically implement edit conflict
/// detection.
#[derive(Debug, Clone)]
pub struct Page {
    pub(crate) bot: Bot,
    pub(crate) title: Title,
    pub(crate) title_text: OnceCell<String>,
    pub(crate) info: Arc<AsyncOnceCell<InfoResponseItem>>,
    pub(crate) baserevid: OnceCell<u64>,
}

#[query(prop = "info", inprop = "associatedpage|url")]
pub(crate) struct InfoResponse {}

impl Page {
    /// Get the title of the page
    pub fn title(&self) -> &str {
        self.title_text.get_or_init(|| {
            let codec = &self.bot.config.codec;
            codec.to_pretty(&self.title)
        })
    }

    /// Get a reference to the underlying [`mwtitle::Title`](https://docs.rs/mwtitle/latest/mwtitle/struct.Title.html)
    pub fn as_title(&self) -> &Title {
        &self.title
    }

    /// Get the namespace ID of the page
    pub fn namespace(&self) -> i32 {
        self.title.namespace()
    }

    /// Whether this page refers to a file
    pub fn is_file(&self) -> bool {
        self.title.is_file()
    }

    /// Whether this page refers to a category
    pub fn is_category(&self) -> bool {
        self.title.is_category()
    }

    /// Load basic page information
    async fn info(&self) -> Result<&InfoResponseItem> {
        self.info
            .get_or_try_init(|| async {
                let mut resp: InfoResponse = self
                    .bot
                    .api
                    .query_response([("titles", self.title())])
                    .await?;
                let info = resp
                    .query
                    .pages
                    .pop()
                    .expect("API response returned 0 pages");
                if let Some(revid) = info.lastrevid {
                    let _ = self.baserevid.set(revid);
                }
                Ok(info)
            })
            .await
    }

    /// Whether the page exists or not
    pub async fn exists(&self) -> Result<bool> {
        Ok(!self.info().await?.missing)
    }

    /// Get the canonical URL for this page
    pub async fn url(&self) -> Result<&str> {
        Ok(&self.info().await?.canonicalurl)
    }

    /// Whether the page is a redirect or not
    pub async fn is_redirect(&self) -> Result<bool> {
        Ok(self.info().await?.redirect)
    }

    /// The associated page for this page (subject page for a talk page or
    /// talk page for a subject page)
    pub async fn associated_page(&self) -> Result<Page> {
        self.bot.page(&self.info().await?.associatedpage)
    }

    /// If this page is a redirect, get the `Page` it targets
    pub async fn redirect_target(&self) -> Result<Option<Page>> {
        // Optimize if we already know it's not a redirect
        if self.info.initialized() && !self.is_redirect().await? {
            return Ok(None);
        }
        // Do an API request to resolve the redirect
        let mut resp: InfoResponse = self
            .bot
            .api
            .query_response([("titles", self.title()), ("redirects", "1")])
            .await?;
        match resp.title_map().get(self.title()) {
            Some(redirect) => {
                let page = self.bot.page(redirect)?;
                page.info
                    .set(
                        resp.query
                            .pages
                            .pop()
                            .expect("API response returned 0 pages"),
                    )
                    // unwrap: Safe because we just created the page
                    .unwrap();
                Ok(Some(page))
            }
            None => Ok(None),
        }
    }

    /// Get Parsoid HTML for self.baserevid if it's set, or the latest revision otherwise
    pub async fn html(&self) -> Result<ImmutableWikicode> {
        match self.baserevid.get() {
            None => {
                let resp = self.bot.parsoid.get(self.title()).await?;
                // Keep track of revision id for saving in the future
                if let Some(revid) = &resp.revision_id() {
                    let _ = self.baserevid.set(*revid);
                }
                Ok(resp)
            }
            Some(revid) => {
                Ok(self.bot.parsoid.get_revision(self.title(), *revid).await?)
            }
        }
    }

    /// Get Parsoid HTML for the specified revision
    pub async fn revision_html(&self, revid: u64) -> Result<ImmutableWikicode> {
        self.bot.parsoid.get_revision(self.title(), revid).await
    }

    /// Get wikitext for self.baserevid if it's set, or the latest revision otherwise
    pub async fn wikitext(&self) -> Result<String> {
        let mut params: Vec<(&'static str, String)> = vec![
            ("action", "query".to_string()),
            ("titles", self.title().to_string()),
            ("prop", "revisions".to_string()),
            ("rvprop", "content|ids".to_string()),
            ("rvslots", "main".to_string()),
        ];
        if let Some(revid) = self.baserevid.get() {
            params.push(("rvstartid", revid.to_string()));
            params.push(("rvendid", revid.to_string()));
        }
        let resp = self.bot.api.get_value(&params).await?;
        let page = resp["query"]["pages"][0].as_object().unwrap();
        if page.contains_key("missing") {
            Err(Error::PageDoesNotExist(self.title().to_string()))
        } else {
            match page.get("revisions") {
                Some(revisions) => {
                    let revision = &revisions[0];
                    let _ =
                        self.baserevid.set(revision["revid"].as_u64().unwrap());
                    Ok(revision["slots"]["main"]["content"]
                        .as_str()
                        .unwrap()
                        .to_string())
                }
                None => {
                    // Most likely invalid title, either way revision
                    // doesn't exist
                    Err(Error::PageDoesNotExist(self.title().to_string()))
                }
            }
        }
    }

    /// Save the page using the specified HTML
    pub async fn save<S: Into<Saveable>>(
        self,
        edit: S,
        opts: &SaveOptions,
    ) -> Result<(Page, EditResponse)> {
        let mut exists: Option<bool> = None;
        if self.bot.config.respect_nobots {
            // Check {{nobots}} using existing wikicode
            match self.html().await {
                Ok(html) => {
                    exists = Some(true);
                    let username = self
                        .bot
                        .config
                        .username
                        .clone()
                        .unwrap_or_else(|| "unknown".to_string());
                    if !crate::utils::nobots(&html, &username)? {
                        return Err(Error::Nobots);
                    }
                }
                Err(Error::PageDoesNotExist(_)) => {
                    exists = Some(false);
                }
                Err(error) => {
                    return Err(error);
                }
            }
        } else if self.info.initialized() {
            exists = Some(self.exists().await?);
        }

        let edit = edit.into();
        let wikitext = match edit {
            Saveable::Html(html) => {
                self.bot.parsoid.transform_to_wikitext(&html).await?
            }
            Saveable::Wikitext(wikitext) => wikitext,
        };

        let mut params: Vec<(&'static str, String)> = vec![
            ("action", "edit".to_string()),
            ("title", self.title().to_string()),
            ("text", wikitext),
            ("summary", opts.summary.to_string()),
        ];

        // Edit conflict detection
        if let Some(revid) = self.baserevid.get() {
            params.push(("baserevid", revid.to_string()));
        }
        // Even more basic edit conflict detection if we already have it
        match exists {
            Some(true) => {
                // Exists, don't create a new page
                params.push(("nocreate", "1".to_string()));
            }
            Some(false) => {
                // Missing, only create a new page
                params.push(("createonly", "1".to_string()));
            }
            None => {} // May or may not exist
        }

        if opts.mark_as_bot.unwrap_or(self.bot.config.mark_as_bot) {
            params.push(("bot", "1".to_string()));
        }
        if !opts.tags.is_empty() {
            params.push(("tags", opts.tags.join("|")));
        }
        // TODO: would be nice if we could output a sleep message here
        self.bot.state.save_timer.lock().await.tick().await;
        info!("Saving [[{}]]", self.title());
        let resp: Value = self.bot.api.post_with_token("csrf", &params).await?;
        match resp["edit"]["result"].as_str() {
            Some("Success") => {
                let edit_response: EditResponse =
                    serde_json::from_value(resp["edit"].clone())?;
                if resp["edit"]["nochange"].as_bool() == Some(false) {
                    let page = Page {
                        bot: self.bot,
                        title: self.title,
                        title_text: self.title_text,
                        info: Default::default(),
                        baserevid: OnceCell::from(
                            resp["edit"]["newrevid"].as_u64().unwrap(),
                        ),
                    };
                    Ok((page, edit_response))
                } else {
                    Ok((self, edit_response))
                }
            }
            // Some legacy code might return "result": "Failure" but the
            // structure is totally unspecified, so we're best off just
            // passing the entire blob into the error in the hope it
            // contains some clue.
            _ => Err(Error::UnknownSaveFailure(resp)),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::tests::{is_authenticated, testwp};
    use crate::Error;
    use std::time::{Duration, SystemTime};

    #[tokio::test]
    async fn test_exists() {
        let bot = testwp().await;
        let page = bot.page("Main Page").unwrap();
        assert!(page.exists().await.unwrap());
        let page2 = bot.page("DoesNotExistPlease").unwrap();
        assert!(!page2.exists().await.unwrap());
    }

    #[tokio::test]
    async fn test_title() {
        let bot = testwp().await;
        // Note the trailing space
        let page = bot.page("Main Page ").unwrap();
        assert_eq!(page.title(), "Main Page");
        assert_eq!(page.as_title().dbkey(), "Main_Page");
    }

    #[tokio::test]
    async fn test_get_redirect_target() {
        let bot = testwp().await;
        let redir = bot.page("Mwbot-rs/Redirect").unwrap();
        let target = redir.redirect_target().await.unwrap().unwrap();
        // "Redirect" points to "Main Page"
        assert_eq!(target.title(), "Main Page");
        // "Main Page" is not a redirect
        assert!(target.redirect_target().await.unwrap().is_none());
    }

    #[tokio::test]
    async fn test_get_content() {
        let bot = testwp().await;
        let page = bot.page("Main Page").unwrap();
        let html = page.html().await.unwrap().into_mutable();
        assert_eq!(html.title().unwrap(), "Main Page".to_string());
        assert_eq!(
            html.select_first("b").unwrap().text_contents(),
            "test wiki".to_string()
        );
        let wikitext = page.wikitext().await.unwrap();
        assert!(wikitext.contains("'''test wiki'''"));
    }

    #[tokio::test]
    async fn test_set_baserevid() {
        let bot = testwp().await;
        let page = bot.page("Main Page").unwrap();
        assert!(page.baserevid.get().is_none());
        page.info().await.unwrap();
        assert!(page.baserevid.get().is_some());
    }

    #[tokio::test]
    async fn test_missing_page() {
        let bot = testwp().await;
        let page = bot.page("DoesNotExistPlease").unwrap();
        let err = page.html().await.unwrap_err();
        match err {
            Error::PageDoesNotExist(page) => {
                assert_eq!(&page, "DoesNotExistPlease")
            }
            err => {
                panic!("Unexpected error: {:?}", err)
            }
        }
        let err2 = page.wikitext().await.unwrap_err();
        match err2 {
            Error::PageDoesNotExist(page) => {
                assert_eq!(&page, "DoesNotExistPlease")
            }
            err => {
                panic!("Unexpected error: {:?}", err)
            }
        }
    }

    #[tokio::test]
    async fn test_save() {
        if !is_authenticated() {
            return;
        }

        let bot = testwp().await;
        let wikitext = format!(
            "It has been {} seconds since the epoch.",
            SystemTime::now()
                .duration_since(SystemTime::UNIX_EPOCH)
                .unwrap()
                .as_secs()
        );
        let mut retries = 0;
        loop {
            let page = bot.page("mwbot-rs/Save").unwrap();
            let resp = page
                .save(
                    wikitext.to_string(),
                    &SaveOptions::summary("Test suite edit"),
                )
                .await;
            match resp {
                Ok(resp) => {
                    assert_eq!(&resp.1.title, "Mwbot-rs/Save");
                    return;
                }
                Err(Error::EditConflict) => {
                    if retries > 5 {
                        panic!("hit more than 5 edit conflicts");
                    }
                    retries += 1;
                    tokio::time::sleep(Duration::from_secs(5)).await;
                    continue;
                }
                Err(ref err) => {
                    dbg!(&resp);
                    panic!("{}", err);
                }
            }
        }
    }

    #[tokio::test]
    async fn test_protected() {
        if !is_authenticated() {
            return;
        }

        let bot = testwp().await;
        let page = bot.page("mwbot-rs/Protected").unwrap();
        let wikitext = "Wait, I can edit this page?".to_string();
        let error = page
            .save(wikitext, &SaveOptions::summary("Test suite edit"))
            .await
            .unwrap_err();
        dbg!(&error);
        assert!(matches!(error, Error::ProtectedPage));
    }

    #[tokio::test]
    async fn test_spamfilter() {
        if !is_authenticated() {
            return;
        }

        let bot = testwp().await;
        let page = bot.page("mwbot-rs/SpamBlacklist").unwrap();
        let wikitext = "https://bitly.com/12345".to_string();
        let error = page
            .save(wikitext, &SaveOptions::summary("Test suite edit"))
            .await
            .unwrap_err();
        if let Error::SpamFilter { matches, .. } = error {
            assert_eq!(matches, vec!["bitly.com".to_string()])
        } else {
            panic!("{:?} doesn't match", error)
        }
    }

    #[tokio::test]
    async fn test_partialblock() {
        if !is_authenticated() {
            return;
        }
        let bot = testwp().await;
        let page = bot.page("Mwbot-rs/Partially blocked").unwrap();
        let error = page
            .save(
                "I shouldn't be able to edit this".to_string(),
                &SaveOptions::summary("Test suite edit"),
            )
            .await
            .unwrap_err();
        if let Error::PartiallyBlocked { info, .. } = error {
            assert_eq!(info, "You have been blocked from editing this page.");
        } else {
            panic!("{:?} doesn't match", error);
        }
    }

    /// Regression test to verify we don't panic on invalid titles
    /// https://gitlab.com/mwbot-rs/mwbot/-/issues/33
    ///
    /// Mostly moot now that we have proper title validation
    #[tokio::test]
    async fn test_invalidtitle() {
        let bot = testwp().await;
        // Should return an error
        let err = bot.page("<invalid title>").unwrap_err();
        assert!(matches!(err, Error::InvalidTitle(_)));
        let err = bot.page("Special:BlankPage").unwrap_err();
        assert!(matches!(err, Error::InvalidPage));
    }

    #[tokio::test]
    async fn test_editconflict() {
        if !is_authenticated() {
            return;
        }
        let bot = testwp().await;
        let page = bot.page("mwbot-rs/Edit conflict").unwrap();
        // Fake a older baserevid in
        page.baserevid.set(498547).unwrap();
        let err = page
            .save(
                "This should fail",
                &SaveOptions::summary("this should fail"),
            )
            .await
            .unwrap_err();
        match err {
            Error::EditConflict => assert!(true),
            err => panic!("{:?} is not Error::EditConflict", err),
        }
    }

    #[tokio::test]
    async fn test_associated_page() {
        let bot = testwp().await;
        let page = bot.page("Main Page").unwrap();
        assert_eq!(
            page.associated_page().await.unwrap().title(),
            "Talk:Main Page"
        );
    }

    #[tokio::test]
    async fn test_nobots() {
        if !is_authenticated() {
            return;
        }
        let bot = testwp().await;
        let page = bot.page("Mwbot-rs/Nobots").unwrap();
        let error = page
            .save(
                "This edit should not go through due to the {{nobots}} template".to_string(),
                &SaveOptions::summary("Test suite edit"),
            )
            .await
            .unwrap_err();
        assert!(matches!(error, Error::Nobots));
    }
}