use std::sync::Arc;
use std::time::Duration;
use serde_json::{Value, json};
use tokio::time::{Instant, sleep, timeout_at};
use crate::cdp::core::CdpCore;
use crate::cdp::element::ChromiumElement;
use crate::cdp::interceptor::CdpIntercept;
use crate::cdp::listener::CdpListen;
use crate::cdp::types::{
Cookie, CookieParam, DialogInfo, DownloadInfo, GetOptions, ImageFormat, LoadMode, PageRect,
ShotOpts,
};
use crate::keys::KeyInput;
use crate::locator::{self, Query};
use crate::{Error, Result};
#[derive(Clone)]
pub struct ChromiumTab {
pub(crate) core: Arc<CdpCore>,
}
impl ChromiumTab {
pub(crate) fn new(core: Arc<CdpCore>) -> Self {
Self { core }
}
pub fn set_timeout(&self, d: Duration) {
self.core.set_timeout(d);
}
pub fn dump_env(&self) -> crate::cdp::ChromiumEnvDumper {
crate::envkit::EnvDumper::new(self.clone())
}
pub async fn close(&self) -> Result<()> {
let _ = self
.core
.conn
.send(
"Target.closeTarget",
json!({ "targetId": self.core.target_id }),
None,
)
.await;
if let Some(ctx) = &self.core.browser_context_id {
let _ = self
.core
.conn
.send(
"Target.disposeBrowserContext",
json!({ "browserContextId": ctx }),
None,
)
.await;
}
Ok(())
}
pub fn timeout(&self) -> Duration {
self.core.timeout()
}
pub async fn get(&self, url: &str) -> Result<bool> {
let ok = self
.navigate_once(url, self.core.timeout(), LoadMode::Normal, None)
.await?;
self.core.set_load_ok(ok);
Ok(ok)
}
pub async fn get_with(&self, url: &str, opts: &GetOptions) -> Result<bool> {
let timeout = opts.timeout.unwrap_or_else(|| self.core.timeout());
let mode = opts.load_mode.unwrap_or(LoadMode::Normal);
let attempts = opts.retry + 1;
let mut ok = false;
for i in 0..attempts {
ok = self
.navigate_once(url, timeout, mode, opts.referer.as_deref())
.await
.unwrap_or(false);
if ok || mode == LoadMode::None {
break;
}
if i + 1 < attempts {
sleep(opts.interval).await;
}
}
self.core.set_load_ok(ok);
Ok(ok)
}
async fn navigate_once(
&self,
url: &str,
timeout: Duration,
mode: LoadMode,
referer: Option<&str>,
) -> Result<bool> {
let mut events = self.core.conn.subscribe();
let mut params = json!({ "url": url });
if let Some(r) = referer {
params["referrer"] = json!(r);
}
self.core.send("Page.navigate", params).await?;
let Some(ev_name) = mode.cdp_event() else {
return Ok(true); };
let sid = self.core.session_id.clone();
let deadline = Instant::now() + timeout;
let loaded = timeout_at(deadline, async {
loop {
match events.recv().await {
Ok(ev) if ev.method == ev_name && ev.session_id.as_deref() == Some(&sid) => {
break;
}
Ok(_) => continue,
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
Err(_) => break,
}
}
})
.await
.is_ok();
Ok(loaded)
}
pub async fn reload(&self) -> Result<()> {
self.core.send("Page.reload", json!({})).await?;
let _ = self.wait_loaded().await;
Ok(())
}
pub async fn back(&self) -> Result<()> {
self.history_go(-1).await
}
pub async fn forward(&self) -> Result<()> {
self.history_go(1).await
}
async fn history_go(&self, delta: i64) -> Result<()> {
let h = self
.core
.send("Page.getNavigationHistory", json!({}))
.await?;
let idx = h["currentIndex"].as_i64().unwrap_or(0);
let entries = h["entries"].as_array().cloned().unwrap_or_default();
let target = idx + delta;
if target >= 0 && (target as usize) < entries.len() {
if let Some(id) = entries[target as usize]["id"].as_i64() {
self.core
.send("Page.navigateToHistoryEntry", json!({ "entryId": id }))
.await?;
let _ = self.wait_loaded().await;
}
}
Ok(())
}
pub async fn stop_loading(&self) -> Result<()> {
if self.core.send("Page.stopLoading", json!({})).await.is_err() {
let _ = self.run_js("window.stop()").await;
}
Ok(())
}
pub async fn ready_state(&self) -> Result<String> {
Ok(self
.run_js("document.readyState")
.await?
.as_str()
.unwrap_or("")
.to_string())
}
pub async fn wait_loaded(&self) -> Result<bool> {
let deadline = Instant::now() + self.core.timeout();
loop {
if self.ready_state().await.unwrap_or_default() == "complete" {
return Ok(true);
}
if Instant::now() >= deadline {
return Ok(false);
}
sleep(Duration::from_millis(80)).await;
}
}
pub fn url_available(&self) -> bool {
self.core.load_ok()
}
pub async fn user_agent(&self) -> Result<String> {
Ok(self
.run_js("navigator.userAgent")
.await?
.as_str()
.unwrap_or("")
.to_string())
}
pub async fn handle_next_dialog(
&self,
accept: bool,
prompt_text: Option<&str>,
) -> Result<DialogInfo> {
let mut events = self.core.conn.subscribe();
let sid = self.core.session_id.clone();
let deadline = Instant::now() + self.core.timeout();
let info = timeout_at(deadline, async {
loop {
match events.recv().await {
Ok(ev)
if ev.method == "Page.javascriptDialogOpening"
&& ev.session_id.as_deref() == Some(&sid) =>
{
return Some(DialogInfo {
message: ev.params["message"]
.as_str()
.unwrap_or_default()
.to_string(),
dialog_type: ev.params["type"].as_str().unwrap_or_default().to_string(),
default_prompt: ev.params["defaultPrompt"]
.as_str()
.unwrap_or_default()
.to_string(),
});
}
Ok(_) => continue,
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
Err(_) => return None,
}
}
})
.await
.ok()
.flatten()
.ok_or_else(|| Error::msg("CDP: 未捕获到对话框(需与触发动作并发)"))?;
let mut p = json!({ "accept": accept });
if let Some(t) = prompt_text {
p["promptText"] = json!(t);
}
self.core.send("Page.handleJavaScriptDialog", p).await?;
Ok(info)
}
pub async fn set_download_path(&self, dir: impl AsRef<std::path::Path>) -> Result<()> {
let dir = dir.as_ref();
let _ = std::fs::create_dir_all(dir);
self.core
.send(
"Browser.setDownloadBehavior",
json!({ "behavior": "allow", "downloadPath": dir.display().to_string(), "eventsEnabled": true }),
)
.await?;
self.core.set_download_dir(dir.to_path_buf());
Ok(())
}
pub fn downloads(&self) -> crate::cdp::ChromiumDownloads {
crate::cdp::ChromiumDownloads::new(self.core.clone())
}
pub async fn wait_download(&self, timeout: Duration) -> Result<DownloadInfo> {
let mut events = self.core.conn.subscribe();
let sid = self.core.session_id.clone();
let deadline = Instant::now() + timeout;
let mut info = DownloadInfo::default();
let done = timeout_at(deadline, async {
loop {
match events.recv().await {
Ok(ev) if ev.session_id.as_deref() == Some(&sid) => match ev.method.as_str() {
"Page.downloadWillBegin" => {
info.url = ev.params["url"].as_str().unwrap_or_default().to_string();
info.suggested_filename = ev.params["suggestedFilename"]
.as_str()
.unwrap_or_default()
.to_string();
info.state = "inProgress".into();
}
"Page.downloadProgress" => {
if ev.params["state"].as_str() == Some("completed") {
info.state = "completed".into();
return true;
} else if ev.params["state"].as_str() == Some("canceled") {
info.state = "canceled".into();
return true;
}
}
_ => {}
},
Ok(_) => continue,
Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
Err(_) => return false,
}
}
})
.await
.unwrap_or(false);
if !done && info.state.is_empty() {
return Err(Error::msg("CDP: 等待下载超时(未捕获下载事件)"));
}
Ok(info)
}
pub async fn storage_state(&self) -> Result<Value> {
let cookies = self.core.send("Storage.getCookies", json!({})).await?["cookies"].clone();
let ls = self
.run_js(
"(function(){ try { const o={}; for (let i=0;i<localStorage.length;i++){ const k=localStorage.key(i); o[k]=localStorage.getItem(k);} return JSON.stringify({origin: location.origin, localStorage: o}); } catch(e){ return '{}'; } })()",
)
.await?;
let origin_ls: Value = ls
.as_str()
.and_then(|s| serde_json::from_str(s).ok())
.unwrap_or(json!({}));
Ok(json!({ "cookies": cookies, "origins": [origin_ls] }))
}
pub async fn apply_storage_state(&self, state: &Value) -> Result<()> {
if let Some(cookies) = state["cookies"].as_array() {
self.core
.send("Storage.setCookies", json!({ "cookies": cookies }))
.await?;
}
if let Some(origins) = state["origins"].as_array() {
for o in origins {
if let Some(ls) = o["localStorage"].as_object() {
let pairs = serde_json::to_string(ls).unwrap_or_else(|_| "{}".into());
let js = format!(
"(function(d){{ try {{ for (const k in d) localStorage.setItem(k, d[k]); }} catch(e){{}} }})({pairs})"
);
let _ = self.run_js(&js).await;
}
}
}
Ok(())
}
pub async fn run_js(&self, expression: &str) -> Result<Value> {
self.core.eval_value(expression).await
}
pub async fn title(&self) -> Result<String> {
Ok(self
.run_js("document.title")
.await?
.as_str()
.unwrap_or("")
.to_string())
}
pub async fn url(&self) -> Result<String> {
Ok(self
.run_js("location.href")
.await?
.as_str()
.unwrap_or("")
.to_string())
}
pub async fn html(&self) -> Result<String> {
Ok(self
.run_js("document.documentElement.outerHTML")
.await?
.as_str()
.unwrap_or("")
.to_string())
}
pub async fn ele(&self, selector: &str) -> Result<ChromiumElement> {
match self
.core
.eval_handle(&doc_query_expr(selector, true))
.await?
{
Some(oid) => Ok(ChromiumElement::new(self.core.clone(), oid)),
None => Err(Error::ElementNotFound(selector.to_string())),
}
}
pub async fn wait_ele(
&self,
selector: &str,
timeout: Option<Duration>,
) -> Result<ChromiumElement> {
let deadline = Instant::now() + timeout.unwrap_or_else(|| self.core.timeout());
loop {
if let Some(oid) = self
.core
.eval_handle(&doc_query_expr(selector, true))
.await?
{
return Ok(ChromiumElement::new(self.core.clone(), oid));
}
if Instant::now() >= deadline {
return Err(Error::ElementNotFound(selector.to_string()));
}
sleep(Duration::from_millis(100)).await;
}
}
pub async fn eles(&self, selector: &str) -> Result<Vec<ChromiumElement>> {
let Some(arr) = self
.core
.eval_handle(&doc_query_expr(selector, false))
.await?
else {
return Ok(Vec::new());
};
let oids = self.core.array_object_ids(&arr).await?;
Ok(oids
.into_iter()
.map(|oid| ChromiumElement::new(self.core.clone(), oid))
.collect())
}
pub async fn ele_text(&self, selector: &str) -> Result<Option<String>> {
match self.ele(selector).await {
Ok(el) => Ok(Some(el.text().await?)),
Err(Error::ElementNotFound(_)) => Ok(None),
Err(e) => Err(e),
}
}
pub async fn s_root(&self) -> Result<crate::static_element::StaticElement> {
crate::static_element::StaticElement::parse(&self.html().await?)
}
pub async fn s_ele(&self, selector: &str) -> Result<crate::static_element::StaticElement> {
crate::static_element::StaticElement::parse(&self.html().await?)?.ele(selector)
}
pub async fn s_eles(
&self,
selector: &str,
) -> Result<Vec<crate::static_element::StaticElement>> {
crate::static_element::StaticElement::parse(&self.html().await?)?.eles(selector)
}
pub async fn get_frame(&self, selector: &str) -> Result<crate::cdp::frame::ChromiumFrame> {
self.ele(selector).await?.content_frame().await
}
pub async fn exists(&self, selector: &str) -> Result<bool> {
Ok(self
.core
.eval_handle(&doc_query_expr(selector, true))
.await?
.is_some())
}
pub async fn click(&self, selector: &str) -> Result<()> {
self.ele(selector).await?.click().await
}
pub async fn input(&self, selector: &str, text: &str) -> Result<()> {
self.ele(selector).await?.input(text).await
}
pub async fn mouse_move(&self, x: f64, y: f64) -> Result<()> {
self.core
.dispatch_mouse("mouseMoved", x, y, "none", 0, 0)
.await
}
pub async fn mouse_down(&self, x: f64, y: f64) -> Result<()> {
self.core
.dispatch_mouse("mousePressed", x, y, "left", 1, 1)
.await
}
pub async fn mouse_up(&self, x: f64, y: f64) -> Result<()> {
self.core
.dispatch_mouse("mouseReleased", x, y, "left", 0, 1)
.await
}
pub async fn mouse_drag(&self, x: f64, y: f64) -> Result<()> {
self.core
.dispatch_mouse("mouseMoved", x, y, "none", 1, 0)
.await
}
pub fn mouse_move_fast(&self, x: f64, y: f64) -> Result<()> {
self.core
.dispatch_mouse_fire("mouseMoved", x, y, "none", 0, 0)
}
pub fn mouse_drag_fast(&self, x: f64, y: f64) -> Result<()> {
self.core
.dispatch_mouse_fire("mouseMoved", x, y, "none", 1, 0)
}
pub async fn wheel_at(&self, x: f64, y: f64, delta_x: f64, delta_y: f64) -> Result<()> {
self.core
.send(
"Input.dispatchMouseEvent",
json!({ "type": "mouseWheel", "x": x, "y": y, "deltaX": delta_x, "deltaY": delta_y }),
)
.await?;
Ok(())
}
pub async fn wheel(&self, delta_x: f64, delta_y: f64) -> Result<()> {
self.wheel_at(10.0, 10.0, delta_x, delta_y).await
}
pub async fn scroll_by(&self, x: f64, y: f64) -> Result<()> {
self.run_js(&format!("window.scrollBy({x},{y})")).await?;
Ok(())
}
pub async fn press_key(&self, key: &str) -> Result<()> {
self.core.press_key(key).await
}
pub async fn key_combo(&self, keys: &[&str]) -> Result<()> {
self.core.key_combo(keys).await
}
pub async fn type_keys(&self, parts: &[KeyInput]) -> Result<()> {
for p in parts {
match p {
KeyInput::Text(t) => self.core.insert_text(t).await?,
KeyInput::Key(k) => self.core.press_key(k).await?,
}
}
Ok(())
}
pub async fn screenshot_bytes(&self) -> Result<Vec<u8>> {
let r = self
.core
.send("Page.captureScreenshot", json!({ "format": "png" }))
.await?;
let data = r["data"]
.as_str()
.ok_or_else(|| Error::msg("CDP: 无截图数据"))?;
crate::util::base64_decode(data).ok_or_else(|| Error::msg("CDP: 截图 base64 解码失败"))
}
pub async fn screenshot_full_bytes(&self) -> Result<Vec<u8>> {
let r = self
.core
.send(
"Page.captureScreenshot",
json!({ "format": "png", "captureBeyondViewport": true }),
)
.await?;
let data = r["data"]
.as_str()
.ok_or_else(|| Error::msg("CDP: 无整页截图数据"))?;
crate::util::base64_decode(data).ok_or_else(|| Error::msg("CDP: 整页截图 base64 解码失败"))
}
pub async fn get_screenshot(
&self,
path: impl AsRef<std::path::Path>,
full_page: bool,
) -> Result<std::path::PathBuf> {
let path = path.as_ref().to_path_buf();
let bytes = if full_page {
self.screenshot_full_bytes().await?
} else {
self.screenshot_bytes().await?
};
if let Some(dir) = path.parent() {
let _ = std::fs::create_dir_all(dir);
}
std::fs::write(&path, &bytes)?;
Ok(path)
}
pub async fn screenshot(&self, opts: &ShotOpts) -> Result<Vec<u8>> {
let mut params = json!({ "format": opts.format.cdp_format() });
if let (Some(q), ImageFormat::Jpeg) = (opts.quality, opts.format) {
params["quality"] = json!(q);
}
if let Some(((l, t), (r, b))) = opts.region {
params["clip"] = json!({ "x": l, "y": t, "width": (r - l).max(1.0), "height": (b - t).max(1.0), "scale": 1 });
params["captureBeyondViewport"] = json!(true);
} else if opts.full_page {
params["captureBeyondViewport"] = json!(true);
}
let resp = self.core.send("Page.captureScreenshot", params).await?;
let data = resp["data"]
.as_str()
.ok_or_else(|| Error::msg("CDP: 无截图数据"))?;
crate::util::base64_decode(data).ok_or_else(|| Error::msg("CDP: 截图 base64 解码失败"))
}
pub async fn screenshot_base64(&self, full_page: bool) -> Result<String> {
let mut params = json!({ "format": "png" });
if full_page {
params["captureBeyondViewport"] = json!(true);
}
let resp = self.core.send("Page.captureScreenshot", params).await?;
Ok(resp["data"].as_str().unwrap_or("").to_string())
}
pub async fn size(&self) -> Result<(f64, f64)> {
let v = self.run_js("[innerWidth, innerHeight]").await?;
let f = |i: usize| v.get(i).and_then(Value::as_f64).unwrap_or(0.0);
Ok((f(0), f(1)))
}
pub async fn page_size(&self) -> Result<(f64, f64)> {
let v = self
.run_js("[document.documentElement.scrollWidth, document.documentElement.scrollHeight]")
.await?;
let f = |i: usize| v.get(i).and_then(Value::as_f64).unwrap_or(0.0);
Ok((f(0), f(1)))
}
pub async fn rect(&self) -> Result<PageRect> {
let v = self
.run_js(
"({ww:innerWidth,wh:innerHeight,pw:document.documentElement.scrollWidth,\
ph:document.documentElement.scrollHeight,sx:scrollX,sy:scrollY,dpr:devicePixelRatio})",
)
.await?;
let f = |k: &str| v.get(k).and_then(Value::as_f64).unwrap_or(0.0);
Ok(PageRect {
window_width: f("ww"),
window_height: f("wh"),
page_width: f("pw"),
page_height: f("ph"),
scroll_x: f("sx"),
scroll_y: f("sy"),
device_pixel_ratio: f("dpr"),
})
}
pub async fn get_cookies(&self) -> Result<Vec<Value>> {
let r = self.core.send("Storage.getCookies", json!({})).await?;
Ok(r["cookies"].as_array().cloned().unwrap_or_default())
}
pub async fn cookies(&self) -> Result<Vec<Cookie>> {
let r = self.core.send("Storage.getCookies", json!({})).await?;
let s = |c: &Value, k: &str| c.get(k).and_then(Value::as_str).unwrap_or("").to_string();
Ok(r["cookies"]
.as_array()
.cloned()
.unwrap_or_default()
.iter()
.map(|c| Cookie {
name: s(c, "name"),
value: s(c, "value"),
domain: s(c, "domain"),
path: s(c, "path"),
expires: c.get("expires").and_then(Value::as_f64).unwrap_or(-1.0),
http_only: c.get("httpOnly").and_then(Value::as_bool).unwrap_or(false),
secure: c.get("secure").and_then(Value::as_bool).unwrap_or(false),
})
.collect())
}
pub async fn set_cookies(&self, cookies: Vec<CookieParam>) -> Result<()> {
let arr: Vec<Value> = cookies
.iter()
.map(|c| {
let mut o = json!({ "name": c.name, "value": c.value });
if let Some(u) = &c.url {
o["url"] = json!(u);
}
if let Some(d) = &c.domain {
o["domain"] = json!(d);
}
if let Some(p) = &c.path {
o["path"] = json!(p);
}
if let Some(v) = c.secure {
o["secure"] = json!(v);
}
if let Some(v) = c.http_only {
o["httpOnly"] = json!(v);
}
if let Some(v) = c.expires {
o["expires"] = json!(v);
}
o
})
.collect();
self.core
.send("Storage.setCookies", json!({ "cookies": arr }))
.await?;
Ok(())
}
#[cfg(feature = "ocr")]
pub async fn ocr_image(&self, selector: &str) -> Result<String> {
use tokio::sync::OnceCell;
static OCR: OnceCell<crate::ocr::Ocr> = OnceCell::const_new();
let ocr = OCR.get_or_try_init(crate::ocr::Ocr::new).await?;
let el = self.ele(selector).await?;
let src = el.attr("src").await.ok().flatten().unwrap_or_default();
let bytes = if src.starts_with("data:") {
let b64 = src.split_once(',').map(|x| x.1).unwrap_or("");
crate::util::base64_decode(b64).unwrap_or_default()
} else {
el.screenshot_bytes().await?
};
ocr.recognize(&bytes)
}
pub fn actions(&self) -> crate::cdp::ChromiumActions {
crate::cdp::ChromiumActions::new(self.core.clone())
}
pub fn scroll(&self) -> crate::cdp::ChromiumScroll {
crate::cdp::ChromiumScroll::new(self.core.clone())
}
pub fn set(&self) -> crate::cdp::ChromiumSetTab {
crate::cdp::ChromiumSetTab::new(self.core.clone())
}
pub fn wait(&self) -> crate::cdp::ChromiumWait {
crate::cdp::ChromiumWait::new(self.core.clone())
}
pub async fn paginate<F, Fut, T>(
&self,
next_selector: &str,
max_pages: usize,
mut f: F,
) -> Result<Vec<T>>
where
F: FnMut(&ChromiumTab) -> Fut,
Fut: std::future::Future<Output = Result<T>>,
{
let mut out = Vec::new();
for _ in 0..max_pages {
out.push(f(self).await?);
match self.ele(next_selector).await {
Ok(btn) => {
if !btn.is_clickable().await.unwrap_or(false) {
break;
}
btn.click().await?;
let _ = self.wait_loaded().await;
sleep(Duration::from_millis(300)).await;
}
Err(_) => break,
}
}
Ok(out)
}
pub fn console(&self) -> crate::cdp::ChromiumConsole {
crate::cdp::ChromiumConsole::new(self.core.clone())
}
pub fn websocket(&self) -> crate::cdp::ChromiumWsListener {
crate::cdp::ChromiumWsListener::new(self.core.clone())
}
pub fn screencast(&self) -> crate::cdp::ChromiumScreencast {
crate::cdp::ChromiumScreencast::new(self.core.clone())
}
pub fn listen(&self) -> CdpListen {
CdpListen::new(self.core.clone())
}
pub fn intercept(&self) -> CdpIntercept {
CdpIntercept::new(self.core.clone())
}
}
pub(crate) fn doc_query_expr(selector: &str, single: bool) -> String {
match locator::parse(selector) {
Query::Css(sel) => {
let s = serde_json::to_string(&sel).unwrap_or_else(|_| "\"\"".into());
if single {
format!("document.querySelector({s})")
} else {
format!("Array.from(document.querySelectorAll({s}))")
}
}
Query::Xpath(xp) => {
let s = serde_json::to_string(&xp).unwrap_or_else(|_| "\"\"".into());
if single {
format!("document.evaluate({s}, document, null, 9, null).singleNodeValue")
} else {
format!(
"(function(){{ const it=document.evaluate({s}, document, null, 7, null); \
const a=[]; for (let i=0;i<it.snapshotLength;i++) a.push(it.snapshotItem(i)); return a; }})()"
)
}
}
}
}
#[cfg(test)]
mod tests {
use super::doc_query_expr;
#[test]
fn css_query_expr() {
assert_eq!(
doc_query_expr("css:h1", true),
"document.querySelector(\"h1\")"
);
assert_eq!(
doc_query_expr("#a .b", false),
"Array.from(document.querySelectorAll(\"#a .b\"))"
);
}
#[test]
fn xpath_query_expr() {
let s = doc_query_expr("xpath://div[@id=\"x\"]", true);
assert!(s.starts_with("document.evaluate("));
assert!(s.contains("singleNodeValue"));
}
}