use crate::input::{Input, InputType};
use reqwest::{Method, Url};
use scraper::{ElementRef, Html, Selector};
use std::str::FromStr;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("Form doesn't contain input named '{input_name}' of type '{input_type:?}'!")]
InputNotInFormError {
input_name: String,
input_type: InputType,
},
}
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug)]
pub struct Form {
page_url: Url,
method: Method,
action: String,
id: Option<String>,
inputs: Vec<Input>,
}
pub(crate) struct SubmitFormInfo {
pub url: String,
pub method: Method,
pub data: Vec<(String, String)>,
}
static BUTTONS: [InputType; 3] = [InputType::Button, InputType::Reset, InputType::Submit];
impl Form {
pub fn id(&self) -> Option<&str> {
self.id.as_deref()
}
pub fn input(&self, t: InputType, name: &str) -> Result<&Input> {
for input in &self.inputs {
if input.t() != t || name != input.name() {
continue;
}
return Ok(input);
}
Err(Error::InputNotInFormError {
input_name: name.to_owned(),
input_type: t,
})
}
pub fn input_mut(&mut self, t: InputType, name: &str) -> Result<&mut Input> {
for input in &mut self.inputs {
if input.t() != t || name != input.name() {
continue;
}
return Ok(input);
}
Err(Error::InputNotInFormError {
input_name: name.to_owned(),
input_type: t,
})
}
pub(crate) fn submit(&self, submit_button_name: Option<&str>) -> Result<SubmitFormInfo> {
let url = self.form_target_url();
let method = self.method.clone();
let mut data = Vec::new();
if let Some(submit_button_name) = submit_button_name {
let input = self.input(InputType::Submit, submit_button_name)?;
data.push((input.name().to_owned(), input.value().unwrap().to_owned()));
}
for input in &self.inputs {
if BUTTONS.contains(&input.t()) {
continue; }
if input.value().is_none() {
continue; }
if input.t() == InputType::Checkbox && input.attr("checked").is_none() {
continue; }
data.push((input.name().to_owned(), input.value().unwrap().to_owned()));
}
Ok(SubmitFormInfo { url, method, data })
}
pub(crate) fn parse(form_ref: &ElementRef, page_url: Url) -> Self {
let form = form_ref.value();
let method_s = form.attr("method").unwrap_or("GET");
let mut method = Method::from_str(&method_s.to_uppercase()).unwrap_or(Method::GET);
if method != Method::GET && method != Method::POST {
method = Method::GET;
}
let action = form
.attr("action")
.or(Some(""))
.map(|s| s.to_owned())
.unwrap();
let id = form.attr("id").map(|s| s.to_owned());
let inputs = Self::parse_form_inputs(form_ref);
Self {
page_url,
method,
action,
id,
inputs,
}
}
fn parse_form_inputs(form: &ElementRef) -> Vec<Input> {
let html = Html::parse_fragment(&form.inner_html());
let mut inputs = Vec::new();
let selector = Selector::parse("input").unwrap();
for input in html.select(&selector) {
let input = input.value();
if let Ok(input) = Input::parse(input) {
inputs.push(input)
}
}
let selector = Selector::parse("button").unwrap();
for button in html.select(&selector) {
let button = button.value();
if let Ok(button) = Input::parse(button) {
inputs.push(button)
}
}
inputs
}
fn form_target_url(&self) -> String {
if self.action.starts_with("http://") || self.action.starts_with("https://") {
return self.action.clone();
}
let mut creds = String::from(self.page_url.username());
if self.page_url.password().is_some() {
creds.push(':');
creds.push_str(self.page_url.password().unwrap());
creds.push('@')
}
let mut url = format!(
"{}://{}{}:{}",
self.page_url.scheme(),
creds,
self.page_url.host_str().unwrap_or(""),
self.page_url.port_or_known_default().unwrap(),
);
if !self.action.starts_with('/') {
if self.page_url.path().ends_with('/') {
url.push_str(&self.page_url.path());
} else {
let mut path_parts: Vec<&str> = self.page_url.path().split('/').collect();
path_parts.pop();
url.push_str(&path_parts.join("/"));
url.push('/');
}
}
url.push_str(&self.action);
url
}
}
#[cfg(test)]
mod tests {
use reqwest::{Method, Url};
use scraper::{Html, Selector};
use crate::input::InputType;
use super::{Form, Result};
static FORM_001: &str = r#"
<html>
<body>
<form id="form_01" method="GET" action="https://www.github.com/submit_stuff">
<input name="txt" type="text" value="txt">
<input name="chk_a" type="checkbox" value="chk_a" checked>
<input name="chk_b" type="checkbox" value="chk_b">
<button name="ok" type="submit" value="ok">OK</button>
</form>
</body>
</html>"#;
#[test]
fn parse_form() -> Result<()> {
let html = Html::parse_fragment(FORM_001);
let selector = Selector::parse("form").unwrap();
let form = html.select(&selector).next().unwrap();
let form = Form::parse(&form, Url::parse("https://wikipedia.org/").unwrap());
assert_eq!(form.page_url, Url::parse("https://wikipedia.org/").unwrap());
assert_eq!(form.method, Method::GET);
assert_eq!(form.action, "https://www.github.com/submit_stuff");
assert_eq!(form.inputs.len(), 4);
assert_eq!(
form.form_target_url(),
"https://www.github.com/submit_stuff"
);
Ok(())
}
#[test]
fn submit_checkboxes() -> Result<()> {
let html = Html::parse_fragment(FORM_001);
let selector = Selector::parse("form").unwrap();
let form = html.select(&selector).next().unwrap();
let mut form = Form::parse(&form, Url::parse("https://wikipedia.org/").unwrap());
let info = form.submit(Some("ok"))?;
assert_eq!(info.method, Method::GET);
assert_eq!(info.url, "https://www.github.com/submit_stuff");
assert_eq!(info.data.len(), 3);
assert!(info.data.contains(&("txt".to_owned(), "txt".to_owned())));
assert!(info.data.contains(&("ok".to_owned(), "ok".to_owned())));
assert!(info
.data
.contains(&("chk_a".to_owned(), "chk_a".to_owned())));
assert!(!info
.data
.contains(&("chk_b".to_owned(), "chk_b".to_owned())));
form.input_mut(InputType::Checkbox, "chk_b")?
.set_attr("checked", Some("".to_owned()));
let info = form.submit(Some("ok"))?;
assert_eq!(info.method, Method::GET);
assert_eq!(info.url, "https://www.github.com/submit_stuff");
assert_eq!(info.data.len(), 4);
assert!(info.data.contains(&("txt".to_owned(), "txt".to_owned())));
assert!(info.data.contains(&("ok".to_owned(), "ok".to_owned())));
assert!(info
.data
.contains(&("chk_a".to_owned(), "chk_a".to_owned())));
assert!(info
.data
.contains(&("chk_b".to_owned(), "chk_b".to_owned())));
form.input_mut(InputType::Checkbox, "chk_a")?
.set_attr("checked", None);
form.input_mut(InputType::Checkbox, "chk_b")?
.set_attr("checked", None);
let info = form.submit(Some("ok"))?;
assert_eq!(info.method, Method::GET);
assert_eq!(info.url, "https://www.github.com/submit_stuff");
assert_eq!(info.data.len(), 2);
assert!(info.data.contains(&("txt".to_owned(), "txt".to_owned())));
assert!(info.data.contains(&("ok".to_owned(), "ok".to_owned())));
Ok(())
}
}