use std::sync::Arc;
use anyhow::{anyhow, Result};
use regex::Regex;
use serde_json::{json, Value};
use crate::bidi::BidiClient;
use crate::cdp::CdpClient;
use crate::detect::Engine;
use crate::session::targets::{open_bidi, open_cdp};
pub enum PageSession {
Cdp(CdpPage),
Bidi(BidiPage),
}
pub struct CdpPage {
pub client: CdpClient,
pub session_id: String,
pub target_id: String,
}
pub struct BidiPage {
pub client: Arc<BidiClient>,
pub context: String,
}
impl PageSession {
pub async fn attach(endpoint: &str, engine: Engine, url_regex: Option<&str>) -> Result<Self> {
let pattern = url_regex.map(Regex::new).transpose()?;
match engine {
Engine::Cdp => {
let client = open_cdp(endpoint).await?;
let target_id = pick_cdp_page(&client, pattern.as_ref()).await?;
let session_id = client.attach_to_target(&target_id).await?;
Ok(PageSession::Cdp(CdpPage {
client,
session_id,
target_id,
}))
}
Engine::Bidi => {
let client = Arc::new(open_bidi(endpoint).await?);
client.session_new().await?;
let context = pick_bidi_context(&client, pattern.as_ref()).await?;
Ok(PageSession::Bidi(BidiPage { client, context }))
}
}
}
pub async fn from_bidi_cache(client: Arc<BidiClient>, url_regex: Option<&str>) -> Result<Self> {
let pattern = url_regex.map(Regex::new).transpose()?;
let context = pick_bidi_context(&client, pattern.as_ref()).await?;
Ok(PageSession::Bidi(BidiPage { client, context }))
}
pub async fn attach_for_origin(endpoint: &str, engine: Engine, origin: &str) -> Result<Self> {
let want =
url::Url::parse(origin).map_err(|e| anyhow!("invalid origin URL `{origin}`: {e}"))?;
let origin_root = origin_root_url(&want);
match engine {
Engine::Cdp => {
let client = open_cdp(endpoint).await?;
let target_id = match find_cdp_target_for_origin(&client, &want).await? {
Some(id) => id,
None => create_cdp_tab(&client, &origin_root).await?,
};
let session_id = client.attach_to_target(&target_id).await?;
Ok(PageSession::Cdp(CdpPage {
client,
session_id,
target_id,
}))
}
Engine::Bidi => {
let client = Arc::new(open_bidi(endpoint).await?);
client.session_new().await?;
let context = match find_bidi_context_for_origin(&client, &want).await? {
Some(c) => c,
None => create_bidi_tab(&client, &origin_root).await?,
};
Ok(PageSession::Bidi(BidiPage { client, context }))
}
}
}
pub async fn evaluate(&self, expression: &str, await_promise: bool) -> Result<Value> {
match self {
PageSession::Cdp(p) => {
let v = p
.client
.send_with_session(
"Runtime.evaluate",
json!({
"expression": expression,
"returnByValue": true,
"awaitPromise": await_promise,
}),
Some(&p.session_id),
)
.await?;
Ok(v["result"]["value"].clone())
}
PageSession::Bidi(p) => {
let _ = await_promise; let v = p.client.script_evaluate(&p.context, expression).await?;
Ok(v["result"]["value"].clone())
}
}
}
pub async fn navigate(&self, url: &str) -> Result<()> {
match self {
PageSession::Cdp(p) => {
p.client
.send_with_session("Page.navigate", json!({"url": url}), Some(&p.session_id))
.await?;
Ok(())
}
PageSession::Bidi(p) => {
p.client.browsing_context_navigate(&p.context, url).await?;
Ok(())
}
}
}
pub async fn screenshot(&self, full_page: bool) -> Result<String> {
match self {
PageSession::Cdp(p) => {
let v = p
.client
.send_with_session(
"Page.captureScreenshot",
json!({
"format": "png",
"captureBeyondViewport": full_page,
}),
Some(&p.session_id),
)
.await?;
v["data"]
.as_str()
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("no screenshot data"))
}
PageSession::Bidi(p) => {
let _ = full_page; p.client
.browsing_context_capture_screenshot(&p.context)
.await
}
}
}
pub fn engine(&self) -> Engine {
match self {
PageSession::Cdp(_) => Engine::Cdp,
PageSession::Bidi(_) => Engine::Bidi,
}
}
pub async fn close(self) {
match self {
PageSession::Cdp(p) => p.client.close().await,
PageSession::Bidi(_) => {}
}
}
}
async fn pick_cdp_page(client: &CdpClient, pattern: Option<&Regex>) -> Result<String> {
let targets = client.list_targets().await?;
let mut pages = targets
.iter()
.filter(|t| t.get("type").and_then(|v| v.as_str()) == Some("page"));
let pick = if let Some(re) = pattern {
pages
.find(|t| {
t.get("url")
.and_then(|v| v.as_str())
.is_some_and(|u| re.is_match(u))
})
.ok_or_else(|| anyhow!("no CDP page target matched URL regex"))?
} else {
pages
.next()
.ok_or_else(|| anyhow!("no page target found"))?
};
pick.get("targetId")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("targetId missing from page target"))
}
async fn pick_bidi_context(client: &BidiClient, pattern: Option<&Regex>) -> Result<String> {
let tree = client.send("browsingContext.getTree", json!({})).await?;
let contexts = tree
.get("contexts")
.and_then(|v| v.as_array())
.ok_or_else(|| anyhow!("no contexts in browsingContext.getTree"))?;
if let Some(re) = pattern {
for c in contexts {
let url = c.get("url").and_then(|v| v.as_str()).unwrap_or("");
if re.is_match(url) {
return c
.get("context")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("no context id"));
}
}
Err(anyhow!("no BiDi context matched URL regex"))
} else {
contexts
.first()
.and_then(|c| c.get("context").and_then(|v| v.as_str()))
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("no top-level browsing context"))
}
}
pub(crate) fn same_origin(a: &url::Url, b: &url::Url) -> bool {
a.scheme() == b.scheme()
&& a.host_str() == b.host_str()
&& a.port_or_known_default() == b.port_or_known_default()
}
pub(crate) fn origin_root_url(u: &url::Url) -> String {
let scheme = u.scheme();
let host = u.host_str().unwrap_or("");
match (u.port(), u.port_or_known_default()) {
(Some(p), _) => format!("{scheme}://{host}:{p}/"),
(None, _) => format!("{scheme}://{host}/"),
}
}
async fn find_cdp_target_for_origin(client: &CdpClient, want: &url::Url) -> Result<Option<String>> {
let targets = client.list_targets().await?;
Ok(targets
.iter()
.filter(|t| t.get("type").and_then(|v| v.as_str()) == Some("page"))
.find_map(|t| {
let u = t.get("url").and_then(|v| v.as_str())?;
let parsed = url::Url::parse(u).ok()?;
if same_origin(&parsed, want) {
t.get("targetId")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
} else {
None
}
}))
}
async fn create_cdp_tab(client: &CdpClient, url: &str) -> Result<String> {
let v = client
.send("Target.createTarget", json!({ "url": url }))
.await?;
v.get("targetId")
.and_then(|x| x.as_str())
.map(|s| s.to_string())
.ok_or_else(|| anyhow!("Target.createTarget did not return targetId"))
}
async fn find_bidi_context_for_origin(
client: &BidiClient,
want: &url::Url,
) -> Result<Option<String>> {
let tree = client.send("browsingContext.getTree", json!({})).await?;
let contexts = tree
.get("contexts")
.and_then(|v| v.as_array())
.cloned()
.unwrap_or_default();
Ok(contexts.iter().find_map(|c| {
let u = c.get("url").and_then(|v| v.as_str())?;
let parsed = url::Url::parse(u).ok()?;
if same_origin(&parsed, want) {
c.get("context")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
} else {
None
}
}))
}
async fn create_bidi_tab(client: &BidiClient, url: &str) -> Result<String> {
let v = client
.send("browsingContext.create", json!({ "type": "tab" }))
.await?;
let ctx = v
.get("context")
.and_then(|x| x.as_str())
.ok_or_else(|| anyhow!("browsingContext.create did not return context"))?
.to_string();
client.browsing_context_navigate(&ctx, url).await?;
Ok(ctx)
}
#[cfg(test)]
mod tests {
use super::*;
use futures_util::{SinkExt, StreamExt};
use tokio_tungstenite::tungstenite::Message;
async fn spawn_cdp_mock(targets: Vec<Value>) -> String {
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
tokio::spawn(async move {
let (stream, _) = listener.accept().await.unwrap();
let mut ws = tokio_tungstenite::accept_async(stream).await.unwrap();
while let Some(Ok(Message::Text(t))) = ws.next().await {
let req: Value = serde_json::from_str(&t).unwrap();
let id = req["id"].as_u64().unwrap();
let method = req["method"].as_str().unwrap_or("");
let result = match method {
"Target.getTargets" => json!({"targetInfos": targets.clone()}),
"Target.attachToTarget" => json!({"sessionId": "S1"}),
"Target.createTarget" => json!({"targetId": "NEW"}),
"Runtime.evaluate" => json!({"result": {"value": "ok"}}),
"Page.navigate" => json!({}),
"Page.captureScreenshot" => json!({"data": "PNGDATA"}),
_ => json!({}),
};
let resp = json!({"id": id, "result": result});
ws.send(Message::Text(resp.to_string())).await.unwrap();
}
});
format!("ws://{addr}")
}
#[test]
fn same_origin_basic() {
let a = url::Url::parse("https://example.com/path?q=1").unwrap();
let b = url::Url::parse("https://example.com/other").unwrap();
let c = url::Url::parse("https://other.test/path").unwrap();
let d = url::Url::parse("http://example.com/").unwrap();
assert!(same_origin(&a, &b));
assert!(!same_origin(&a, &c));
assert!(!same_origin(&a, &d));
}
#[test]
fn origin_root_strips_path_and_default_port() {
let u = url::Url::parse("https://example.com/foo/bar?x=1#z").unwrap();
assert_eq!(origin_root_url(&u), "https://example.com/");
let u2 = url::Url::parse("http://localhost:8080/foo").unwrap();
assert_eq!(origin_root_url(&u2), "http://localhost:8080/");
}
#[tokio::test]
async fn attach_for_origin_reuses_matching_tab() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://other.test/x"}),
json!({"targetId":"b","type":"page","url":"https://example.com/login"}),
])
.await;
let s = PageSession::attach_for_origin(&url, Engine::Cdp, "https://example.com/api/v1")
.await
.unwrap();
match s {
PageSession::Cdp(p) => assert_eq!(p.target_id, "b"),
_ => panic!("expected CDP"),
}
}
#[tokio::test]
async fn attach_for_origin_creates_tab_when_no_match() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://other.test/"}),
])
.await;
let s = PageSession::attach_for_origin(&url, Engine::Cdp, "https://example.com/api")
.await
.unwrap();
match s {
PageSession::Cdp(p) => assert_eq!(p.target_id, "NEW"),
_ => panic!("expected CDP"),
}
}
#[tokio::test]
async fn attach_cdp_picks_first_page_when_no_regex() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
json!({"targetId":"b","type":"page","url":"https://other.test/"}),
])
.await;
let s = PageSession::attach(&url, Engine::Cdp, None).await.unwrap();
match s {
PageSession::Cdp(p) => {
assert_eq!(p.target_id, "a");
assert_eq!(p.session_id, "S1");
}
_ => panic!("expected CDP"),
}
}
#[tokio::test]
async fn attach_cdp_url_regex_selects_matching() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
json!({"targetId":"b","type":"page","url":"https://other.test/"}),
])
.await;
let s = PageSession::attach(&url, Engine::Cdp, Some(r"other"))
.await
.unwrap();
match s {
PageSession::Cdp(p) => assert_eq!(p.target_id, "b"),
_ => panic!("expected CDP"),
}
}
#[tokio::test]
async fn attach_cdp_url_regex_no_match_errors() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
])
.await;
let err = match PageSession::attach(&url, Engine::Cdp, Some("nomatch")).await {
Ok(_) => panic!("expected error"),
Err(e) => e,
};
assert!(err.to_string().contains("no CDP page target matched"));
}
#[tokio::test]
async fn evaluate_round_trip_cdp() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
])
.await;
let s = PageSession::attach(&url, Engine::Cdp, None).await.unwrap();
let v = s.evaluate("1+1", false).await.unwrap();
assert_eq!(v, json!("ok"));
s.close().await;
}
#[tokio::test]
async fn screenshot_round_trip_cdp() {
let url = spawn_cdp_mock(vec![
json!({"targetId":"a","type":"page","url":"https://example.com/"}),
])
.await;
let s = PageSession::attach(&url, Engine::Cdp, None).await.unwrap();
let b64 = s.screenshot(false).await.unwrap();
assert_eq!(b64, "PNGDATA");
s.close().await;
}
}