use crate::form::Form;
use reqwest::{header::HeaderMap, Method, StatusCode, Url};
use scraper::{ElementRef, Html, Selector};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("Query param '{param}' is not defined in query string '{query}'!")]
UnknownQueryParamError {
query: String,
param: String,
},
#[error("Failed to parse CSS selector '{selector}', reason: {reason}")]
CssSelectorParseError {
selector: String,
reason: String,
},
#[error("CSS selector '{selector}' matched no elements.")]
CssSelectorResultEmptyError {
selector: String,
},
#[error("This page contains {num_forms} forms; index {idx} is out of bounds!")]
FormIndexOutOfBoundsError {
num_forms: usize,
idx: usize,
},
#[error("This page contains no form with id '{id}'!")]
FormIdNotFoundError {
id: String,
},
}
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug)]
pub struct Page {
method: Method,
status: StatusCode,
headers: HeaderMap,
url: Url,
text: String,
html: Html,
forms: Vec<Form>,
}
impl Page {
pub(crate) fn build(
method: Method,
url: Url,
status: StatusCode,
headers: HeaderMap,
text: String,
) -> Self {
let html = Html::parse_document(&text);
let forms = Self::parse_forms(&html, &url);
Self {
method,
status,
headers,
url,
text,
html,
forms,
}
}
pub const fn method(&self) -> &Method {
&self.method
}
pub const fn status(&self) -> &StatusCode {
&self.status
}
pub const fn headers(&self) -> &HeaderMap {
&self.headers
}
pub const fn url(&self) -> &Url {
&self.url
}
pub fn text(&self) -> &str {
&self.text
}
pub fn form(&self, idx: usize) -> Result<&Form> {
self.forms.get(idx).ok_or(Error::FormIndexOutOfBoundsError {
num_forms: self.forms.len(),
idx,
})
}
pub fn form_mut(&mut self, idx: usize) -> Result<&mut Form> {
let len = self.forms.len();
self.forms
.get_mut(idx)
.ok_or(Error::FormIndexOutOfBoundsError {
num_forms: len,
idx,
})
}
pub fn form_by_id(&self, id: &str) -> Result<&Form> {
for form in &self.forms {
if form.id().is_some() && form.id().unwrap() == id {
return Ok(form);
}
}
Err(Error::FormIdNotFoundError { id: id.to_owned() })
}
pub fn form_by_id_mut(&mut self, id: &str) -> Result<&mut Form> {
for form in &mut self.forms {
if form.id().is_some() && form.id().unwrap() == id {
return Ok(form);
}
}
Err(Error::FormIdNotFoundError { id: id.to_owned() })
}
fn parse_selectors(&self, selectors: &str) -> Result<Selector> {
Selector::parse(selectors).map_err(|error| Error::CssSelectorParseError {
selector: selectors.to_owned(),
reason: format!("{error:?}"),
})
}
pub fn select_first(&self, selectors: &str) -> Result<ElementRef> {
let s = self.parse_selectors(selectors)?;
self.html
.select(&s)
.next()
.ok_or_else(|| Error::CssSelectorResultEmptyError {
selector: selectors.to_owned(),
})
}
pub fn select(&self, selectors: &str) -> Result<Vec<ElementRef>> {
let selectors = self.parse_selectors(selectors)?;
Ok(self.html.select(&selectors).collect::<Vec<ElementRef>>())
}
pub fn query(&self, name: &str) -> Result<String> {
for (k, v) in self.url.query_pairs() {
if k.eq(name) {
return Ok(v.to_string());
}
}
Err(Error::UnknownQueryParamError {
query: String::from(self.url.query().unwrap_or("")),
param: String::from(name),
})
}
fn parse_forms(html: &Html, url: &Url) -> Vec<Form> {
let mut forms = Vec::new();
let selector = Selector::parse("form").unwrap();
for form_ref in html.select(&selector) {
forms.push(Form::parse(&form_ref, url.clone()));
}
forms
}
}
#[cfg(test)]
mod tests {
use crate::input::InputType;
use super::Page;
use reqwest::{header::HeaderMap, Method, StatusCode, Url};
static PAGE_001: &str = r#"
<html>
<body>
<h1>Test</h1>
<form action="subpage1" id="id_01">
<input type="hidden" name="hidden" value="hidden">
<button type="submit" value="submit" name="submit">Submit</button>
</form>
<form action="subpage1" id="id_02">
<input type="hidden" name="hidden" value="hidden">
<button type="submit" value="submit" name="submit">Submit</button>
</form>
<form action="subpage1" id="id_03">
<input type="hidden" name="hidden" value="hidden">
<button type="submit" value="submit" name="submit">Submit</button>
</form>
</body>
</html>
"#;
#[test]
fn parse_page() {
let method = Method::GET;
let url = Url::parse("https://wikipedia.org/").unwrap();
let status = StatusCode::OK;
let headers = HeaderMap::new();
let text = PAGE_001.to_owned();
let page = Page::build(method, url, status, headers, text);
assert_eq!(page.method(), Method::GET);
assert_eq!(*page.status(), StatusCode::OK);
assert_eq!(page.headers().len(), 0);
assert_eq!(*page.url(), Url::parse("https://wikipedia.org/").unwrap());
assert_eq!(page.text(), PAGE_001);
assert_eq!(page.forms.len(), 3);
assert_eq!(page.form(0).unwrap().id(), Some("id_01"));
assert_eq!(page.form(1).unwrap().id(), Some("id_02"));
assert_eq!(page.form(2).unwrap().id(), Some("id_03"));
let form = page.form_by_id("id_02").unwrap();
let hidden = form.input(InputType::Hidden, "hidden").unwrap();
assert_eq!(hidden.name(), "hidden");
assert_eq!(hidden.value(), Some("hidden"));
}
}