parsoid 0.10.1

Wrapper around Parsoid HTML that provides convenient accessors for processing and manipulation
Documentation
/*
Copyright (C) 2020-2021 Kunal Mehta <legoktm@debian.org>

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
//! Iterate through the first 500 featured articles and run through our
//! processing code.

use parsoid::{Client, Result};
use serde_json::Value;
use tokio::fs;
use urlencoding::encode;

const USER_AGENT: &str = "parsoid-rs testing";

async fn get_wikitext(title: &str) -> Result<String> {
    let resp = reqwest::Client::builder()
        .user_agent(USER_AGENT)
        .build()?
        .get(format!(
            "https://en.wikipedia.org/w/index.php?title={}&action=raw",
            encode(title)
        ))
        .send()
        .await?
        .error_for_status()?
        .text()
        .await?;
    Ok(resp)
}

async fn featured_articles() -> Result<Vec<String>> {
    let resp: Value = reqwest::Client::builder()
        .user_agent(USER_AGENT)
        .build()?
        .get(
            "https://en.wikipedia.org/w/api.php?action=query&format=json&list=categorymembers&formatversion=2&cmtitle=Category%3AFeatured%20articles&cmlimit=max"
        ).send().await?.error_for_status()?.json().await?;
    let mut articles = vec![];
    for member in resp["query"]["categorymembers"].as_array().unwrap() {
        articles.push(member["title"].as_str().unwrap().to_string());
    }
    Ok(articles)
}

#[tokio::main]
async fn main() -> Result<()> {
    let client = Client::new(
        "https://en.wikipedia.org/w/rest.php",
        "parsoid-rs testing",
    )?;
    for article in featured_articles().await? {
        let html = client.get_raw(&article).await?;
        let wikitext = get_wikitext(&article).await?;
        fs::write(format!("corpus/{}.html", encode(&article)), &html)
            .await
            .unwrap();
        fs::write(format!("corpus/{}.wiki", encode(&article)), &wikitext)
            .await
            .unwrap();
        println!("Saved {}", &article);
    }
    Ok(())
}