use std::cell::OnceCell;
use std::collections::HashMap;
use bytes::Bytes;
use serde_json::Value;
use scrapling::selector::Selector;
use crate::status::status_text;
pub struct Response {
pub status: u16,
pub reason: String,
pub cookies: HashMap<String, String>,
pub headers: HashMap<String, String>,
pub request_headers: HashMap<String, String>,
pub history: Vec<Response>,
pub encoding: String,
pub method: String,
pub meta: HashMap<String, Value>,
pub body: Bytes,
url: String,
parsed: OnceCell<Selector>,
}
unsafe impl Send for Response {}
impl Response {
#[allow(clippy::too_many_arguments)]
pub fn new(
url: &str,
body: Bytes,
status: u16,
reason: Option<String>,
cookies: HashMap<String, String>,
headers: HashMap<String, String>,
request_headers: HashMap<String, String>,
encoding: String,
method: String,
history: Vec<Response>,
meta: HashMap<String, Value>,
) -> Self {
Self {
status,
reason: reason.unwrap_or_else(|| status_text(status).to_owned()),
cookies,
headers,
request_headers,
history,
encoding,
method,
meta,
body,
url: url.to_owned(),
parsed: OnceCell::new(),
}
}
pub fn url(&self) -> &str {
&self.url
}
pub fn selector(&self) -> &Selector {
self.parsed.get_or_init(|| {
let html = String::from_utf8_lossy(&self.body);
Selector::from_html_with_url(&html, &self.url)
})
}
pub fn css(&self, query: &str) -> scrapling::selector::Selectors {
self.selector().css(query)
}
pub fn find_by_text(
&self,
text: &str,
partial: bool,
case_sensitive: bool,
clean_match: bool,
) -> scrapling::selector::Selectors {
self.selector()
.find_by_text(text, partial, case_sensitive, clean_match)
}
pub fn text(&self) -> scrapling::TextHandler {
self.selector().text()
}
pub fn urljoin(&self, relative: &str) -> String {
self.selector().urljoin(relative)
}
pub fn is_success(&self) -> bool {
(200..300).contains(&self.status)
}
pub fn is_redirect(&self) -> bool {
(300..400).contains(&self.status)
}
pub fn is_client_error(&self) -> bool {
(400..500).contains(&self.status)
}
pub fn is_server_error(&self) -> bool {
(500..600).contains(&self.status)
}
pub fn follow_url(&self, relative: &str) -> String {
self.urljoin(relative)
}
pub fn to_markdown(&self) -> String {
scrapling::shell::Convertor::to_markdown(&String::from_utf8_lossy(&self.body))
}
pub fn to_text(&self) -> String {
scrapling::shell::Convertor::to_text(&String::from_utf8_lossy(&self.body))
}
}
impl std::fmt::Debug for Response {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Response")
.field("status", &self.status)
.field("url", &self.url)
.field("method", &self.method)
.finish()
}
}
impl std::fmt::Display for Response {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "<{} {}>", self.status, self.url)
}
}
fn extract_encoding(headers: &HashMap<String, String>) -> String {
headers
.get("content-type")
.and_then(|ct| {
ct.split(';').find_map(|part| {
part.trim()
.strip_prefix("charset=")
.map(|c| c.trim().to_owned())
})
})
.unwrap_or_else(|| "utf-8".to_owned())
}
pub(crate) async fn build_response_async(
resp: wreq::Response,
request_headers: HashMap<String, String>,
method: &str,
meta: HashMap<String, Value>,
) -> crate::error::Result<Response> {
let status = resp.status().as_u16();
let reason = resp.status().canonical_reason().map(|s| s.to_owned());
let headers: HashMap<String, String> = resp
.headers()
.iter()
.filter_map(|(name, value)| {
value
.to_str()
.ok()
.map(|v| (name.as_str().to_owned(), v.to_owned()))
})
.collect();
let cookies: HashMap<String, String> = resp
.cookies()
.map(|c| (c.name().to_owned(), c.value().to_owned()))
.collect();
let encoding = extract_encoding(&headers);
let url = resp.uri().to_string();
let body_bytes = resp.bytes().await?;
Ok(Response::new(
&url,
body_bytes,
status,
reason,
cookies,
headers,
request_headers,
encoding,
method.to_owned(),
Vec::new(),
meta,
))
}