use std::collections::HashMap;
use bytes::Bytes;
use serde_json::Value;
use tracing::debug;
use scrapling_fetch::Response;
use scrapling_fetch::status_text;
pub async fn from_browser_page(
page: &playwright_rs::Page,
first_response: Option<&playwright_rs::Response>,
_final_response: Option<&playwright_rs::Response>,
meta: HashMap<String, Value>,
_captured_xhr: Vec<XhrCapture>,
) -> crate::error::Result<Response> {
let active_response = _final_response.or(first_response);
let status = active_response.map(|r| r.status()).unwrap_or(200);
let reason = active_response
.map(|r| {
let st = r.status_text();
if st.is_empty() {
status_text(status).to_owned()
} else {
st.to_owned()
}
})
.unwrap_or_else(|| status_text(status).to_owned());
let headers = match first_response {
Some(resp) => resp.all_headers().await.unwrap_or_default(),
None => HashMap::new(),
};
let encoding = extract_encoding(&headers);
let content = get_page_content(page, 20).await?;
let page_url = page.url();
let body = Bytes::from(content.into_bytes());
let cookies = match page.context() {
Ok(ctx) => ctx
.cookies(None)
.await
.map(|c| c.into_iter().map(|ck| (ck.name, ck.value)).collect())
.unwrap_or_default(),
Err(_) => HashMap::new(),
};
let response = Response::new(
&page_url,
body,
status,
Some(reason),
cookies,
headers,
HashMap::new(),
encoding,
"GET".to_owned(),
Vec::new(),
meta,
);
Ok(response)
}
async fn get_page_content(
page: &playwright_rs::Page,
max_retries: u32,
) -> crate::error::Result<String> {
for attempt in 0..max_retries {
match page.content().await {
Ok(content) => return Ok(content),
Err(e) => {
if attempt < max_retries - 1 {
debug!(attempt = attempt + 1, "page.content() failed, retrying");
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
} else {
return Err(crate::error::BrowserError::Navigation(format!(
"page.content() failed after {max_retries} attempts: {e}"
)));
}
}
}
}
unreachable!()
}
fn extract_encoding(headers: &HashMap<String, String>) -> String {
headers
.get("content-type")
.and_then(|ct| {
ct.split(';').find_map(|part| {
let part = part.trim();
part.strip_prefix("charset=").map(|c| c.trim().to_owned())
})
})
.unwrap_or_else(|| "utf-8".to_owned())
}
#[derive(Debug)]
pub struct XhrCapture {
pub url: String,
pub status: u16,
pub headers: HashMap<String, String>,
pub body: Bytes,
}