use crate::immutable::ImmutableWikicode;
use crate::private::Sealed;
use crate::{Error, Result, Wikicode};
use lazy_static::lazy_static;
use reqwest::header::HeaderMap;
use reqwest::{header, Client as HttpClient, Response};
use std::fmt::Write as _;
use std::sync::Arc;
use tokio::sync::Semaphore;
use tracing::debug;
use urlencoding::encode;
const VERSION: &str = env!("CARGO_PKG_VERSION");
const ACCEPT_2_8_0: &str = "text/html; charset=utf-8; profile=\"https://www.mediawiki.org/wiki/Specs/HTML/2.8.0\"";
pub trait APICode: Sealed {
fn html(&self) -> String;
fn title(&self) -> Option<String>;
fn etag(&self) -> Option<&str>;
fn revid(&self) -> Option<u64>;
}
impl APICode for ImmutableWikicode {
fn html(&self) -> String {
self.html().to_string()
}
fn title(&self) -> Option<String> {
self.title.clone()
}
fn etag(&self) -> Option<&str> {
self.etag()
}
fn revid(&self) -> Option<u64> {
self.revision_id()
}
}
impl APICode for Wikicode {
fn html(&self) -> String {
self.to_string()
}
fn title(&self) -> Option<String> {
self.title()
}
fn etag(&self) -> Option<&str> {
self.etag.as_deref()
}
fn revid(&self) -> Option<u64> {
self.revision_id()
}
}
#[derive(Clone, Debug)]
pub struct Client {
http: HttpClient,
base_url: String,
semaphore: Arc<Semaphore>,
}
impl Client {
pub fn new(base_url: &str, user_agent: &str) -> Result<Self> {
let concurrency = if base_url.contains("rest.php") {
1
} else {
100
};
let mut http = HttpClient::builder();
let ua = format!("parsoid-rs/{VERSION} {user_agent}");
#[cfg(target_arch = "wasm32")]
{
let mut headers = header::HeaderMap::new();
headers
.insert("Api-User-Agent", header::HeaderValue::from_str(&ua)?);
http = http.default_headers(headers);
}
#[cfg(not(target_arch = "wasm32"))]
{
http = http.user_agent(ua);
}
Ok(Client {
http: http.build()?,
base_url: base_url.to_string(),
semaphore: Arc::new(Semaphore::new(concurrency)),
})
}
pub fn new_with_client(base_url: &str, http: HttpClient) -> Self {
Client {
http,
base_url: base_url.to_string(),
semaphore: Arc::new(Semaphore::new(10)),
}
}
fn default_headers(&self) -> HeaderMap {
lazy_static! {
static ref HEADERMAP: HeaderMap = {
let mut headers = header::HeaderMap::new();
headers.insert(
header::ACCEPT,
ACCEPT_2_8_0
.parse()
.expect("Unable to parse Accept header"),
);
headers
};
}
(*HEADERMAP).clone()
}
async fn page_html(
&self,
page: &str,
revid: Option<u64>,
) -> Result<Response> {
let url_part = format!("{}/page/html/{}", self.base_url, encode(page));
let url = if let Some(revid) = revid {
format!("{url_part}/{revid}")
} else {
url_part
};
let url = format!("{url}?redirect=false");
let req = self
.http
.get(&url)
.headers(self.default_headers())
.build()?;
let _lock = self.semaphore.acquire().await?;
debug!(?req);
let resp = self.http.execute(req).await?;
debug!(?resp);
drop(_lock);
if resp.status() == 404 {
Err(Error::PageDoesNotExist(page.to_string()))
} else {
Ok(resp.error_for_status()?)
}
}
pub async fn get(&self, page: &str) -> Result<ImmutableWikicode> {
let resp = self.page_html(page, None).await?;
let etag = match &resp.headers().get("etag") {
Some(etag) => match etag.to_str() {
Ok(etag) => etag.to_string(),
Err(_) => return Err(Error::InvalidEtag),
},
None => return Err(Error::InvalidEtag),
};
let code = {
let mut code = Wikicode::new(&resp.text().await?);
code.etag = Some(etag);
code.title = Some(page.to_string());
code.into_immutable()
};
Ok(code)
}
pub async fn get_revision(
&self,
page: &str,
revid: u64,
) -> Result<ImmutableWikicode> {
let resp = self.page_html(page, Some(revid)).await?;
let etag = match &resp.headers().get("etag") {
Some(etag) => match etag.to_str() {
Ok(etag) => etag.to_string(),
Err(_) => return Err(Error::InvalidEtag),
},
None => return Err(Error::InvalidEtag),
};
Ok(ImmutableWikicode {
html: resp.text().await?,
title: Some(page.to_string()),
etag: Some(etag),
revid: Some(revid),
})
}
pub async fn get_raw(&self, page: &str) -> Result<String> {
Ok(self.page_html(page, None).await?.text().await?)
}
pub async fn get_revision_raw(
&self,
page: &str,
revid: u64,
) -> Result<String> {
Ok(self.page_html(page, Some(revid)).await?.text().await?)
}
pub async fn transform_to_html(
&self,
wikitext: &str,
) -> Result<ImmutableWikicode> {
let html = self.transform_to_html_raw(wikitext).await?;
Ok(ImmutableWikicode::new(&html))
}
pub async fn transform_to_html_raw(
&self,
wikitext: &str,
) -> Result<String> {
let url = format!("{}/transform/wikitext/to/html", self.base_url);
let req = self
.http
.post(&url)
.headers(self.default_headers())
.form(&[("wikitext", wikitext)])
.build()?;
let _lock = self.semaphore.acquire().await?;
debug!(?req);
let resp = self.http.execute(req).await?;
debug!(?resp);
drop(_lock);
let html = resp.error_for_status()?.text().await?;
Ok(html)
}
pub async fn transform_to_wikitext<C: APICode>(
&self,
code: &C,
) -> Result<String> {
self.transform_to_wikitext_raw(
&code.html(),
code.title().as_deref(),
code.revid(),
code.etag(),
)
.await
}
pub async fn transform_to_wikitext_raw(
&self,
html: &str,
title: Option<&str>,
revid: Option<u64>,
etag: Option<&str>,
) -> Result<String> {
let mut url = format!("{}/transform/html/to/wikitext", self.base_url);
if let Some(title) = title {
let _ = write!(url, "/{}", encode(title));
if let Some(revid) = revid {
let _ = write!(url, "/{revid}");
}
}
let mut header_map = self.default_headers();
if let Some(etag) = etag {
header_map.insert(header::IF_MATCH, etag.parse().unwrap());
}
let req = self
.http
.post(&url)
.form(&[("html", html)])
.headers(header_map)
.build()?;
let _lock = self.semaphore.acquire().await?;
debug!(?req);
let resp = self.http.execute(req).await?;
debug!(?resp);
drop(_lock);
let wikitext = resp.error_for_status()?.text().await?;
Ok(wikitext)
}
}