1use std::sync::Arc;
9
10use anyhow::{anyhow, Result};
11use regex::Regex;
12use serde_json::{json, Value};
13
14use crate::bidi::BidiClient;
15use crate::cdp::CdpClient;
16use crate::detect::Engine;
17use crate::session::targets::{open_bidi, open_cdp};
18
19pub enum PageSession {
22 Cdp(CdpPage),
23 Bidi(BidiPage),
27}
28
29pub struct CdpPage {
30 pub client: CdpClient,
31 pub session_id: String,
32 pub target_id: String,
33}
34
35pub struct BidiPage {
36 pub client: Arc<BidiClient>,
37 pub context: String,
38}
39
40impl PageSession {
41 pub async fn attach(endpoint: &str, engine: Engine, url_regex: Option<&str>) -> Result<Self> {
47 let pattern = url_regex.map(Regex::new).transpose()?;
48 match engine {
49 Engine::Cdp => {
50 let client = open_cdp(endpoint).await?;
51 let target_id = pick_cdp_page(&client, pattern.as_ref()).await?;
52 let session_id = client.attach_to_target(&target_id).await?;
53 Ok(PageSession::Cdp(CdpPage {
54 client,
55 session_id,
56 target_id,
57 }))
58 }
59 Engine::Bidi => {
60 let client = Arc::new(open_bidi(endpoint).await?);
61 client.session_new().await?;
62 let context = pick_bidi_context(&client, pattern.as_ref()).await?;
63 Ok(PageSession::Bidi(BidiPage { client, context }))
64 }
65 }
66 }
67
68 pub async fn from_bidi_cache(client: Arc<BidiClient>, url_regex: Option<&str>) -> Result<Self> {
74 let pattern = url_regex.map(Regex::new).transpose()?;
75 let context = pick_bidi_context(&client, pattern.as_ref()).await?;
76 Ok(PageSession::Bidi(BidiPage { client, context }))
77 }
78
79 pub async fn attach_for_origin(endpoint: &str, engine: Engine, origin: &str) -> Result<Self> {
90 let want =
91 url::Url::parse(origin).map_err(|e| anyhow!("invalid origin URL `{origin}`: {e}"))?;
92 let origin_root = origin_root_url(&want);
93 match engine {
94 Engine::Cdp => {
95 let client = open_cdp(endpoint).await?;
96 let target_id = match find_cdp_target_for_origin(&client, &want).await? {
97 Some(id) => id,
98 None => create_cdp_tab(&client, &origin_root).await?,
99 };
100 let session_id = client.attach_to_target(&target_id).await?;
101 Ok(PageSession::Cdp(CdpPage {
102 client,
103 session_id,
104 target_id,
105 }))
106 }
107 Engine::Bidi => {
108 let client = Arc::new(open_bidi(endpoint).await?);
109 client.session_new().await?;
110 let context = match find_bidi_context_for_origin(&client, &want).await? {
111 Some(c) => c,
112 None => create_bidi_tab(&client, &origin_root).await?,
113 };
114 Ok(PageSession::Bidi(BidiPage { client, context }))
115 }
116 }
117 }
118
119 pub async fn evaluate(&self, expression: &str, await_promise: bool) -> Result<Value> {
125 match self {
126 PageSession::Cdp(p) => {
127 let v = p
128 .client
129 .send_with_session(
130 "Runtime.evaluate",
131 json!({
132 "expression": expression,
133 "returnByValue": true,
134 "awaitPromise": await_promise,
135 }),
136 Some(&p.session_id),
137 )
138 .await?;
139 Ok(v["result"]["value"].clone())
140 }
141 PageSession::Bidi(p) => {
142 let _ = await_promise; let v = p.client.script_evaluate(&p.context, expression).await?;
144 Ok(v["result"]["value"].clone())
145 }
146 }
147 }
148
149 pub async fn navigate(&self, url: &str) -> Result<()> {
151 match self {
152 PageSession::Cdp(p) => {
153 p.client
154 .send_with_session("Page.navigate", json!({"url": url}), Some(&p.session_id))
155 .await?;
156 Ok(())
157 }
158 PageSession::Bidi(p) => {
159 p.client.browsing_context_navigate(&p.context, url).await?;
160 Ok(())
161 }
162 }
163 }
164
165 pub async fn screenshot(&self, full_page: bool) -> Result<String> {
167 match self {
168 PageSession::Cdp(p) => {
169 let v = p
170 .client
171 .send_with_session(
172 "Page.captureScreenshot",
173 json!({
174 "format": "png",
175 "captureBeyondViewport": full_page,
176 }),
177 Some(&p.session_id),
178 )
179 .await?;
180 v["data"]
181 .as_str()
182 .map(|s| s.to_string())
183 .ok_or_else(|| anyhow!("no screenshot data"))
184 }
185 PageSession::Bidi(p) => {
186 let _ = full_page; p.client
188 .browsing_context_capture_screenshot(&p.context)
189 .await
190 }
191 }
192 }
193
194 pub fn engine(&self) -> Engine {
196 match self {
197 PageSession::Cdp(_) => Engine::Cdp,
198 PageSession::Bidi(_) => Engine::Bidi,
199 }
200 }
201
202 pub async fn close(self) {
205 match self {
206 PageSession::Cdp(p) => p.client.close().await,
207 PageSession::Bidi(_) => {}
208 }
209 }
210}
211
212async fn pick_cdp_page(client: &CdpClient, pattern: Option<&Regex>) -> Result<String> {
213 let targets = client.list_targets().await?;
214 let mut pages = targets
215 .iter()
216 .filter(|t| t.get("type").and_then(|v| v.as_str()) == Some("page"));
217 let pick = if let Some(re) = pattern {
218 pages
219 .find(|t| {
220 t.get("url")
221 .and_then(|v| v.as_str())
222 .is_some_and(|u| re.is_match(u))
223 })
224 .ok_or_else(|| anyhow!("no CDP page target matched URL regex"))?
225 } else {
226 pages
227 .next()
228 .ok_or_else(|| anyhow!("no page target found"))?
229 };
230 pick.get("targetId")
231 .and_then(|v| v.as_str())
232 .map(|s| s.to_string())
233 .ok_or_else(|| anyhow!("targetId missing from page target"))
234}
235
236async fn pick_bidi_context(client: &BidiClient, pattern: Option<&Regex>) -> Result<String> {
237 let tree = client.send("browsingContext.getTree", json!({})).await?;
238 let contexts = tree
239 .get("contexts")
240 .and_then(|v| v.as_array())
241 .ok_or_else(|| anyhow!("no contexts in browsingContext.getTree"))?;
242 if let Some(re) = pattern {
243 for c in contexts {
244 let url = c.get("url").and_then(|v| v.as_str()).unwrap_or("");
245 if re.is_match(url) {
246 return c
247 .get("context")
248 .and_then(|v| v.as_str())
249 .map(|s| s.to_string())
250 .ok_or_else(|| anyhow!("no context id"));
251 }
252 }
253 Err(anyhow!("no BiDi context matched URL regex"))
254 } else {
255 contexts
256 .first()
257 .and_then(|c| c.get("context").and_then(|v| v.as_str()))
258 .map(|s| s.to_string())
259 .ok_or_else(|| anyhow!("no top-level browsing context"))
260 }
261}
262
263pub(crate) fn same_origin(a: &url::Url, b: &url::Url) -> bool {
265 a.scheme() == b.scheme()
266 && a.host_str() == b.host_str()
267 && a.port_or_known_default() == b.port_or_known_default()
268}
269
270pub(crate) fn origin_root_url(u: &url::Url) -> String {
272 let scheme = u.scheme();
273 let host = u.host_str().unwrap_or("");
274 match (u.port(), u.port_or_known_default()) {
275 (Some(p), _) => format!("{scheme}://{host}:{p}/"),
277 (None, _) => format!("{scheme}://{host}/"),
278 }
279}
280
281async fn find_cdp_target_for_origin(client: &CdpClient, want: &url::Url) -> Result<Option<String>> {
282 let targets = client.list_targets().await?;
283 Ok(targets
284 .iter()
285 .filter(|t| t.get("type").and_then(|v| v.as_str()) == Some("page"))
286 .find_map(|t| {
287 let u = t.get("url").and_then(|v| v.as_str())?;
288 let parsed = url::Url::parse(u).ok()?;
289 if same_origin(&parsed, want) {
290 t.get("targetId")
291 .and_then(|v| v.as_str())
292 .map(|s| s.to_string())
293 } else {
294 None
295 }
296 }))
297}
298
299async fn create_cdp_tab(client: &CdpClient, url: &str) -> Result<String> {
300 let v = client
301 .send("Target.createTarget", json!({ "url": url }))
302 .await?;
303 v.get("targetId")
304 .and_then(|x| x.as_str())
305 .map(|s| s.to_string())
306 .ok_or_else(|| anyhow!("Target.createTarget did not return targetId"))
307}
308
309async fn find_bidi_context_for_origin(
310 client: &BidiClient,
311 want: &url::Url,
312) -> Result<Option<String>> {
313 let tree = client.send("browsingContext.getTree", json!({})).await?;
314 let contexts = tree
315 .get("contexts")
316 .and_then(|v| v.as_array())
317 .cloned()
318 .unwrap_or_default();
319 Ok(contexts.iter().find_map(|c| {
320 let u = c.get("url").and_then(|v| v.as_str())?;
321 let parsed = url::Url::parse(u).ok()?;
322 if same_origin(&parsed, want) {
323 c.get("context")
324 .and_then(|v| v.as_str())
325 .map(|s| s.to_string())
326 } else {
327 None
328 }
329 }))
330}
331
332async fn create_bidi_tab(client: &BidiClient, url: &str) -> Result<String> {
333 let v = client
334 .send("browsingContext.create", json!({ "type": "tab" }))
335 .await?;
336 let ctx = v
337 .get("context")
338 .and_then(|x| x.as_str())
339 .ok_or_else(|| anyhow!("browsingContext.create did not return context"))?
340 .to_string();
341 client.browsing_context_navigate(&ctx, url).await?;
342 Ok(ctx)
343}
344
345#[cfg(test)]
346mod tests {
347 use super::*;
348 use futures_util::{SinkExt, StreamExt};
349 use tokio_tungstenite::tungstenite::Message;
350
351 async fn spawn_cdp_mock(targets: Vec<Value>) -> String {
352 let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
353 let addr = listener.local_addr().unwrap();
354 tokio::spawn(async move {
355 let (stream, _) = listener.accept().await.unwrap();
356 let mut ws = tokio_tungstenite::accept_async(stream).await.unwrap();
357 while let Some(Ok(Message::Text(t))) = ws.next().await {
358 let req: Value = serde_json::from_str(&t).unwrap();
359 let id = req["id"].as_u64().unwrap();
360 let method = req["method"].as_str().unwrap_or("");
361 let result = match method {
362 "Target.getTargets" => json!({"targetInfos": targets.clone()}),
363 "Target.attachToTarget" => json!({"sessionId": "S1"}),
364 "Target.createTarget" => json!({"targetId": "NEW"}),
365 "Runtime.evaluate" => json!({"result": {"value": "ok"}}),
366 "Page.navigate" => json!({}),
367 "Page.captureScreenshot" => json!({"data": "PNGDATA"}),
368 _ => json!({}),
369 };
370 let resp = json!({"id": id, "result": result});
371 ws.send(Message::Text(resp.to_string())).await.unwrap();
372 }
373 });
374 format!("ws://{addr}")
375 }
376
377 #[test]
378 fn same_origin_basic() {
379 let a = url::Url::parse("https://example.com/path?q=1").unwrap();
380 let b = url::Url::parse("https://example.com/other").unwrap();
381 let c = url::Url::parse("https://other.test/path").unwrap();
382 let d = url::Url::parse("http://example.com/").unwrap();
383 assert!(same_origin(&a, &b));
384 assert!(!same_origin(&a, &c));
385 assert!(!same_origin(&a, &d));
386 }
387
388 #[test]
389 fn origin_root_strips_path_and_default_port() {
390 let u = url::Url::parse("https://example.com/foo/bar?x=1#z").unwrap();
391 assert_eq!(origin_root_url(&u), "https://example.com/");
392 let u2 = url::Url::parse("http://localhost:8080/foo").unwrap();
393 assert_eq!(origin_root_url(&u2), "http://localhost:8080/");
394 }
395
396 #[tokio::test]
397 async fn attach_for_origin_reuses_matching_tab() {
398 let url = spawn_cdp_mock(vec![
399 json!({"targetId":"a","type":"page","url":"https://other.test/x"}),
400 json!({"targetId":"b","type":"page","url":"https://example.com/login"}),
401 ])
402 .await;
403 let s = PageSession::attach_for_origin(&url, Engine::Cdp, "https://example.com/api/v1")
404 .await
405 .unwrap();
406 match s {
407 PageSession::Cdp(p) => assert_eq!(p.target_id, "b"),
408 _ => panic!("expected CDP"),
409 }
410 }
411
412 #[tokio::test]
413 async fn attach_for_origin_creates_tab_when_no_match() {
414 let url = spawn_cdp_mock(vec![
415 json!({"targetId":"a","type":"page","url":"https://other.test/"}),
416 ])
417 .await;
418 let s = PageSession::attach_for_origin(&url, Engine::Cdp, "https://example.com/api")
419 .await
420 .unwrap();
421 match s {
422 PageSession::Cdp(p) => assert_eq!(p.target_id, "NEW"),
423 _ => panic!("expected CDP"),
424 }
425 }
426
427 #[tokio::test]
428 async fn attach_cdp_picks_first_page_when_no_regex() {
429 let url = spawn_cdp_mock(vec![
430 json!({"targetId":"a","type":"page","url":"https://example.com/"}),
431 json!({"targetId":"b","type":"page","url":"https://other.test/"}),
432 ])
433 .await;
434 let s = PageSession::attach(&url, Engine::Cdp, None).await.unwrap();
435 match s {
436 PageSession::Cdp(p) => {
437 assert_eq!(p.target_id, "a");
438 assert_eq!(p.session_id, "S1");
439 }
440 _ => panic!("expected CDP"),
441 }
442 }
443
444 #[tokio::test]
445 async fn attach_cdp_url_regex_selects_matching() {
446 let url = spawn_cdp_mock(vec![
447 json!({"targetId":"a","type":"page","url":"https://example.com/"}),
448 json!({"targetId":"b","type":"page","url":"https://other.test/"}),
449 ])
450 .await;
451 let s = PageSession::attach(&url, Engine::Cdp, Some(r"other"))
452 .await
453 .unwrap();
454 match s {
455 PageSession::Cdp(p) => assert_eq!(p.target_id, "b"),
456 _ => panic!("expected CDP"),
457 }
458 }
459
460 #[tokio::test]
461 async fn attach_cdp_url_regex_no_match_errors() {
462 let url = spawn_cdp_mock(vec![
463 json!({"targetId":"a","type":"page","url":"https://example.com/"}),
464 ])
465 .await;
466 let err = match PageSession::attach(&url, Engine::Cdp, Some("nomatch")).await {
467 Ok(_) => panic!("expected error"),
468 Err(e) => e,
469 };
470 assert!(err.to_string().contains("no CDP page target matched"));
471 }
472
473 #[tokio::test]
474 async fn evaluate_round_trip_cdp() {
475 let url = spawn_cdp_mock(vec![
476 json!({"targetId":"a","type":"page","url":"https://example.com/"}),
477 ])
478 .await;
479 let s = PageSession::attach(&url, Engine::Cdp, None).await.unwrap();
480 let v = s.evaluate("1+1", false).await.unwrap();
481 assert_eq!(v, json!("ok"));
482 s.close().await;
483 }
484
485 #[tokio::test]
486 async fn screenshot_round_trip_cdp() {
487 let url = spawn_cdp_mock(vec![
488 json!({"targetId":"a","type":"page","url":"https://example.com/"}),
489 ])
490 .await;
491 let s = PageSession::attach(&url, Engine::Cdp, None).await.unwrap();
492 let b64 = s.screenshot(false).await.unwrap();
493 assert_eq!(b64, "PNGDATA");
494 s.close().await;
495 }
496}