use std::sync::Arc;
use base64::Engine as _;
use base64::engine::general_purpose::STANDARD as BASE64;
use rmcp::ErrorData;
use rmcp::model::{CallToolResult, Content};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tokio::sync::Mutex;
use zendriver::{Frame, Tab, ZendriverError};
use crate::errors::{McpServerError, map_error};
use crate::selectors::Selector;
use crate::snapshot::html_trim;
use crate::state::SessionState;
use crate::tools::common::current_tab;
use crate::tools::find::resolve;
#[derive(Debug, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)]
pub struct HtmlInput {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub selector: Option<Selector>,
#[serde(default)]
pub outer: bool,
#[serde(default = "default_true")]
pub trim: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub frame_id: Option<String>,
}
const fn default_true() -> bool {
true
}
pub async fn html(state: Arc<Mutex<SessionState>>, input: HtmlInput) -> Result<String, ErrorData> {
if input.selector.is_some() && input.frame_id.is_some() {
return Err(ErrorData::invalid_params(
"`selector` and `frame_id` are mutually exclusive — use the selector's own `frame_id` field to scope element lookup to a sub-frame."
.to_string(),
None,
));
}
let s = state.lock().await;
let tab = current_tab(&s).await?;
let raw = if let Some(sel) = input.selector.as_ref() {
let el = resolve(&tab, sel).await?;
if input.outer {
el.outer_html()
.await
.map_err(|e| map_error(McpServerError::from(e)))?
} else {
el.inner_html()
.await
.map_err(|e| map_error(McpServerError::from(e)))?
}
} else if let Some(fid) = input.frame_id.as_deref() {
let frame = lookup_frame(&tab, fid).await?;
frame
.content()
.await
.map_err(|e| map_error(McpServerError::from(e)))?
} else {
let frame = tab
.main_frame()
.await
.map_err(|e| map_error(McpServerError::from(e)))?;
frame
.content()
.await
.map_err(|e| map_error(McpServerError::from(e)))?
};
Ok(if input.trim {
html_trim::trim(&raw)
} else {
raw
})
}
async fn lookup_frame(tab: &Tab, frame_id: &str) -> Result<Frame, ErrorData> {
let frames = tab
.frames()
.await
.map_err(|e| map_error(McpServerError::from(e)))?;
frames
.into_iter()
.find(|f| f.id() == frame_id)
.ok_or_else(|| {
map_error(McpServerError::from(ZendriverError::FrameNotFound(
frame_id.to_string(),
)))
})
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, JsonSchema, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ImgFormat {
Png,
Jpeg,
Webp,
}
const fn default_format() -> ImgFormat {
ImgFormat::Png
}
impl ImgFormat {
fn mime(self) -> &'static str {
match self {
Self::Png => "image/png",
Self::Jpeg => "image/jpeg",
Self::Webp => "image/webp",
}
}
fn as_str(self) -> &'static str {
match self {
Self::Png => "png",
Self::Jpeg => "jpeg",
Self::Webp => "webp",
}
}
}
#[derive(Debug, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)]
pub struct ScreenshotInput {
#[serde(default = "default_format")]
pub format: ImgFormat,
#[serde(default)]
pub full_page: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub selector: Option<Selector>,
#[serde(default)]
pub omit_background: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub quality: Option<u8>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub save_path: Option<String>,
}
pub async fn screenshot(
state: Arc<Mutex<SessionState>>,
input: ScreenshotInput,
) -> Result<CallToolResult, ErrorData> {
let s = state.lock().await;
let tab = current_tab(&s).await?;
let clip_bbox = if let Some(sel) = input.selector.as_ref() {
let el = resolve(&tab, sel).await?;
let bbox = el
.bounding_box()
.await
.map_err(|e| map_error(McpServerError::from(e)))?
.ok_or_else(|| {
ErrorData::invalid_request(
"Selected element has no bounding box (likely `display: none` or detached). Cannot clip the screenshot."
.to_string(),
Some(json!({ "suggested_next": "browser_element_state" })),
)
})?;
Some(bbox)
} else {
None
};
let mut builder = tab.screenshot_builder();
builder = match input.format {
ImgFormat::Png => builder.png(),
ImgFormat::Jpeg => builder.jpeg(),
ImgFormat::Webp => builder.webp(),
};
builder = builder.full_page(input.full_page);
builder = builder.omit_background(input.omit_background);
if let Some(q) = input.quality {
builder = builder.quality(q);
}
if let Some(bbox) = clip_bbox {
builder = builder.clip(bbox);
}
let bytes = builder
.bytes()
.await
.map_err(|e| map_error(McpServerError::from(e)))?;
let byte_len = bytes.len();
if let Some(p) = input.save_path.as_deref() {
tokio::fs::write(p, &bytes).await.map_err(|e| {
ErrorData::internal_error(format!("Failed to write screenshot to `{p}`: {e}"), None)
})?;
}
let encoded = BASE64.encode(&bytes);
let image = Content::image(encoded, input.format.mime());
let mut meta = serde_json::Map::new();
meta.insert("format".into(), json!(input.format.as_str()));
meta.insert("byte_len".into(), json!(byte_len));
if let Some(p) = input.save_path.as_deref() {
meta.insert("saved_path".into(), json!(p));
}
let mut result = CallToolResult::success(vec![image]);
result.structured_content = Some(serde_json::Value::Object(meta));
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
fn fresh() -> Arc<Mutex<SessionState>> {
Arc::new(Mutex::new(SessionState::new()))
}
fn css_sel(s: &str) -> Selector {
Selector {
css: Some(s.into()),
xpath: None,
text: None,
text_exact: None,
text_regex: None,
role: None,
role_name: None,
tag: None,
attrs: vec![],
nth: None,
visible_only: true,
timeout_ms: 5000,
frame_id: None,
}
}
#[tokio::test]
async fn html_with_no_browser_suggests_browser_open() {
let err = html(
fresh(),
HtmlInput {
selector: None,
outer: false,
trim: true,
frame_id: None,
},
)
.await
.expect_err("must error without an open browser");
assert!(err.message.contains("browser_open"), "msg: {}", err.message);
let data = err.data.as_ref().expect("data populated");
assert_eq!(data["suggested_next"], "browser_open");
}
#[tokio::test]
async fn html_rejects_selector_and_frame_id_combo_with_invalid_params() {
let err = html(
fresh(),
HtmlInput {
selector: Some(css_sel("h1")),
outer: false,
trim: true,
frame_id: Some("frame-X".into()),
},
)
.await
.expect_err("selector+frame_id must error");
assert!(
err.message.contains("mutually exclusive"),
"msg: {}",
err.message
);
assert_eq!(err.code.0, -32602, "expected invalid_params code");
}
#[tokio::test]
async fn screenshot_with_no_browser_suggests_browser_open() {
let err = screenshot(
fresh(),
ScreenshotInput {
format: ImgFormat::Png,
full_page: false,
selector: None,
omit_background: false,
quality: None,
save_path: None,
},
)
.await
.expect_err("must error without an open browser");
assert!(err.message.contains("browser_open"), "msg: {}", err.message);
let data = err.data.as_ref().expect("data populated");
assert_eq!(data["suggested_next"], "browser_open");
}
}