use std::sync::Arc;
use anyhow::{anyhow, Result};
use regex::Regex;
use serde_json::{json, Value};
use crate::bidi::BidiClient;
use crate::cdp::CdpClient;
use crate::detect::Engine;
use crate::session::targets::{open_bidi, open_cdp};
pub enum PageSession {
Cdp(CdpPage),
Bidi(BidiPage),
}
pub struct CdpPage {
pub client: CdpClient,
pub session_id: String,
pub target_id: String,
}
pub struct BidiPage {
pub client: Arc<BidiClient>,
pub context: String,
}
impl PageSession {
pub async fn attach(
endpoint: &str,
engine: Engine,
url_regex: Option<&str>,
) -> Result<Self> {
let pattern = url_regex.map(Regex::new).transpose()?;
match engine {
Engine::Cdp => {
let client = open_cdp(endpoint).await?;
let target_id = pick_cdp_page(&client, pattern.as_ref()).await?;
let session_id = client.attach_to_target(&target_id).await?;
Ok(PageSession::Cdp(CdpPage {
client,
session_id,
target_id,
}))
}
Engine::Bidi => {
let client = Arc::new(open_bidi(endpoint).await?);
client.session_new().await?;
let context = pick_bidi_context(&client, pattern.as_ref()).await?;
Ok(PageSession::Bidi(BidiPage { client, context }))
}
}
}
pub async fn from_bidi_cache(
client: Arc<BidiClient>,
url_regex: Option<&str>,
) -> Result<Self> {
let pattern = url_regex.map(Regex::new).transpose()?;
let context = pick_bidi_context(&client, pattern.as_ref()).await?;
Ok(PageSession::Bidi(BidiPage { client, context }))
}
pub async fn evaluate(&self, expression: &str, await_promise: bool) -> Result<Value> {
match self {
PageSession::Cdp(p) => {
let v = p
.client
.send_with_session(
"Runtime.evaluate",
json!({
"expression": expression,
"returnByValue": true,
"awaitPromise": await_promise,
}),
Some(&p.session_id),
)
.await?;
Ok(v["result"]["value"].clone())
}
PageSession::Bidi(p) => {
let _ = await_promise; let v = p.client.script_evaluate(&p.context, expression).await?;
Ok(v["result"]["value"].clone())
}
}
}
pub async fn navigate(&self, url: &str) -> Result<()> {
match self {
PageSession::Cdp(p) => {
p.client
.send_with_session(
"Page.navigate",
json!({"url": url}),
Some(&p.session_id),
)
.await?;
Ok(())
}
PageSession::Bidi(p) => {
p.client
.browsing_context_navigate(&p.context, url)
.await?;
Ok(())
}
}
}
pub async fn screenshot(&self, full_page: bool) -> Result<String> {
match self {
PageSession::Cdp(p) => {
let v = p
.client
.send_with_session(
"Page.captureScreenshot",
json!({
"format": "png",
"captureBeyondViewport": full_page,
}),
Some(&p.session_id),
)
.await?;
v["data"]
.as_str()
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("no screenshot data"))
}
PageSession::Bidi(p) => {
let _ = full_page; p.client
.browsing_context_capture_screenshot(&p.context)
.await
}
}
}
pub fn engine(&self) -> Engine {
match self {
PageSession::Cdp(_) => Engine::Cdp,
PageSession::Bidi(_) => Engine::Bidi,
}
}
pub async fn close(self) {
match self {
PageSession::Cdp(p) => p.client.close().await,
PageSession::Bidi(_) => {}
}
}
}
async fn pick_cdp_page(client: &CdpClient, pattern: Option<&Regex>) -> Result<String> {
let targets = client.list_targets().await?;
let mut pages = targets.iter().filter(|t| {
t.get("type").and_then(|v| v.as_str()) == Some("page")
});
let pick = if let Some(re) = pattern {
pages
.find(|t| {
t.get("url")
.and_then(|v| v.as_str())
.is_some_and(|u| re.is_match(u))
})
.ok_or_else(|| anyhow!("no CDP page target matched URL regex"))?
} else {
pages
.next()
.ok_or_else(|| anyhow!("no page target found"))?
};
pick.get("targetId")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("targetId missing from page target"))
}
async fn pick_bidi_context(
client: &BidiClient,
pattern: Option<&Regex>,
) -> Result<String> {
let tree = client.send("browsingContext.getTree", json!({})).await?;
let contexts = tree
.get("contexts")
.and_then(|v| v.as_array())
.ok_or_else(|| anyhow!("no contexts in browsingContext.getTree"))?;
if let Some(re) = pattern {
for c in contexts {
let url = c.get("url").and_then(|v| v.as_str()).unwrap_or("");
if re.is_match(url) {
return c
.get("context")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("no context id"));
}
}
Err(anyhow!("no BiDi context matched URL regex"))
} else {
contexts
.first()
.and_then(|c| c.get("context").and_then(|v| v.as_str()))
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("no top-level browsing context"))
}
}
#[cfg(test)]
mod tests {
use super::*;
use futures_util::{SinkExt, StreamExt};
use tokio_tungstenite::tungstenite::Message;
async fn spawn_cdp_mock(targets: Vec<Value>) -> String {
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
tokio::spawn(async move {
let (stream, _) = listener.accept().await.unwrap();
let mut ws = tokio_tungstenite::accept_async(stream).await.unwrap();
while let Some(Ok(Message::Text(t))) = ws.next().await {
let req: Value = serde_json::from_str(&t).unwrap();
let id = req["id"].as_u64().unwrap();
let method = req["method"].as_str().unwrap_or("");
let result = match method {
"Target.getTargets" => json!({"targetInfos": targets.clone()}),
"Target.attachToTarget" => json!({"sessionId": "S1"}),
"Runtime.evaluate" => json!({"result": {"value": "ok"}}),
"Page.navigate" => json!({}),
"Page.captureScreenshot" => json!({"data": "PNGDATA"}),
_ => json!({}),
};
let resp = json!({"id": id, "result": result});
ws.send(Message::Text(resp.to_string())).await.unwrap();
}
});
format!("ws://{addr}")
}
#[tokio::test]
async fn attach_cdp_picks_first_page_when_no_regex() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
json!({"targetId":"b","type":"page","url":"https://other.test/"}),
])
.await;
let s = PageSession::attach(&url, Engine::Cdp, None).await.unwrap();
match s {
PageSession::Cdp(p) => {
assert_eq!(p.target_id, "a");
assert_eq!(p.session_id, "S1");
}
_ => panic!("expected CDP"),
}
}
#[tokio::test]
async fn attach_cdp_url_regex_selects_matching() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
json!({"targetId":"b","type":"page","url":"https://other.test/"}),
])
.await;
let s = PageSession::attach(&url, Engine::Cdp, Some(r"other"))
.await
.unwrap();
match s {
PageSession::Cdp(p) => assert_eq!(p.target_id, "b"),
_ => panic!("expected CDP"),
}
}
#[tokio::test]
async fn attach_cdp_url_regex_no_match_errors() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
])
.await;
let err = match PageSession::attach(&url, Engine::Cdp, Some("nomatch")).await {
Ok(_) => panic!("expected error"),
Err(e) => e,
};
assert!(err.to_string().contains("no CDP page target matched"));
}
#[tokio::test]
async fn evaluate_round_trip_cdp() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
])
.await;
let s = PageSession::attach(&url, Engine::Cdp, None).await.unwrap();
let v = s.evaluate("1+1", false).await.unwrap();
assert_eq!(v, json!("ok"));
s.close().await;
}
#[tokio::test]
async fn screenshot_round_trip_cdp() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
])
.await;
let s = PageSession::attach(&url, Engine::Cdp, None).await.unwrap();
let b64 = s.screenshot(false).await.unwrap();
assert_eq!(b64, "PNGDATA");
s.close().await;
}
}