use std::cell::{Cell, RefCell};
use std::rc::Rc;
use std::sync::mpsc;
use std::time::{Duration, Instant};
use anyhow::{Result, anyhow};
use dpi::PhysicalSize;
use euclid::{Box2D, Point2D};
use image::RgbaImage;
use servo::{
JSValue, LoadStatus, NavigationRequest, Preferences, RenderingContext, ServoBuilder, SoftwareRenderingContext,
WebView, WebViewBuilder, WebViewDelegate,
};
use servo_fetch::layout;
const JS_EVAL_TIMEOUT: Duration = Duration::from_secs(10);
const SETTLE_DURATION: Duration = Duration::from_millis(500);
const SPIN_INTERVAL: Duration = Duration::from_millis(10);
const LAYOUT_JS: &str = include_str!("js/layout.js");
const MAX_PDF_BYTES: u64 = 50 * 1024 * 1024;
struct Delegate {
loaded: Rc<Cell<bool>>,
pdf_data: Rc<RefCell<Option<Vec<u8>>>>,
}
impl WebViewDelegate for Delegate {
fn notify_load_status_changed(&self, _webview: WebView, status: LoadStatus) {
if status == LoadStatus::Complete {
self.loaded.set(true);
}
}
fn notify_new_frame_ready(&self, webview: WebView) {
webview.paint();
}
fn request_navigation(&self, _webview: WebView, navigation_request: NavigationRequest) {
let is_http = matches!(navigation_request.url.scheme(), "http" | "https");
match navigation_request.url.host_str() {
Some(host) if is_http && !crate::net::is_private_host(host) => navigation_request.allow(),
_ => {
eprintln!("warning: blocked navigation to {}", navigation_request.url);
navigation_request.deny();
}
}
}
fn load_web_resource(&self, _webview: WebView, load: servo::WebResourceLoad) {
let request = load.request();
if !request.is_for_main_frame {
return;
}
let url = request.url.clone();
if let Some(host) = url.host_str() {
if crate::net::is_private_host(host) {
return;
}
}
let agent = ureq::Agent::new_with_config(
ureq::config::Config::builder()
.max_redirects(0)
.timeout_global(Some(std::time::Duration::from_secs(15)))
.build(),
);
let Ok(head_resp) = agent.head(url.as_str()).call() else {
return;
};
let is_pdf = head_resp
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.is_some_and(|ct| ct.to_ascii_lowercase().starts_with("application/pdf"));
if !is_pdf {
return; }
let Ok(get_resp) = agent.get(url.as_str()).call() else {
return;
};
let Ok(bytes) = get_resp.into_body().with_config().limit(MAX_PDF_BYTES).read_to_vec() else {
return;
};
*self.pdf_data.borrow_mut() = Some(bytes);
let resp = servo::WebResourceResponse::new(url);
let mut intercepted = load.intercept(resp);
intercepted.send_body_data(b"<html><body></body></html>".to_vec());
intercepted.finish();
}
}
pub(crate) struct ServoPage {
pub html: String,
pub inner_text: Option<String>,
pub layout_json: Option<String>,
pub screenshot: Option<RgbaImage>,
pub js_result: Option<String>,
pub pdf_data: Option<Vec<u8>>,
}
struct FetchRequest {
url: String,
timeout_secs: u64,
take_screenshot: bool,
custom_js: Option<String>,
reply: mpsc::Sender<Result<ServoPage>>,
}
pub(crate) fn fetch_page(
url: &str,
timeout_secs: u64,
take_screenshot: bool,
custom_js: Option<&str>,
) -> Result<ServoPage> {
static SENDER: std::sync::OnceLock<mpsc::Sender<FetchRequest>> = std::sync::OnceLock::new();
let guard = stderr_guard::StderrGuard::suppress();
let sender = SENDER.get_or_init(|| {
let (tx, rx) = mpsc::channel::<FetchRequest>();
std::thread::Builder::new()
.name("servo-engine".into())
.spawn(move || {
servo_thread(rx);
})
.expect("failed to spawn servo thread");
tx
});
let (reply_tx, reply_rx) = mpsc::channel();
sender
.send(FetchRequest {
url: url.to_string(),
timeout_secs,
take_screenshot,
custom_js: custom_js.map(String::from),
reply: reply_tx,
})
.map_err(|_| anyhow!("Servo engine is not running (it may have crashed on a previous request)"))?;
let result = reply_rx
.recv()
.map_err(|_| anyhow!("Servo engine crashed while processing this page. Try a different URL."))?;
drop(guard);
result
}
#[expect(clippy::needless_pass_by_value)]
fn servo_thread(rx: mpsc::Receiver<FetchRequest>) {
let (rc_ctx, servo) = match build_servo() {
Ok(pair) => pair,
Err(e) => {
if let Ok(req) = rx.recv() {
let _ = req.reply.send(Err(e.context("Servo initialization failed")));
}
return;
}
};
while let Ok(req) = rx.recv() {
let rc_dyn: Rc<dyn RenderingContext> = rc_ctx.clone();
let loaded = Rc::new(Cell::new(false));
let pdf_data: Rc<RefCell<Option<Vec<u8>>>> = Rc::new(RefCell::new(None));
let delegate = Rc::new(Delegate {
loaded: loaded.clone(),
pdf_data: pdf_data.clone(),
});
let parsed_url = match url::Url::parse(&req.url) {
Ok(u) => u,
Err(e) => {
let _ = req.reply.send(Err(anyhow!("bad url: {e}")));
continue;
}
};
let webview = WebViewBuilder::new(&servo, rc_dyn)
.url(parsed_url)
.delegate(delegate)
.build();
let result = handle_request(&servo, &webview, &rc_ctx, &loaded, &pdf_data, &req);
drop(webview);
let _ = req.reply.send(result);
}
}
fn handle_request(
servo: &servo::Servo,
webview: &WebView,
rc_ctx: &Rc<SoftwareRenderingContext>,
loaded: &Cell<bool>,
pdf_data: &RefCell<Option<Vec<u8>>>,
req: &FetchRequest,
) -> Result<ServoPage> {
let deadline = Instant::now() + Duration::from_secs(req.timeout_secs);
spin_until(servo, loaded, deadline, req.timeout_secs)?;
let html = eval_js(servo, webview, "document.documentElement.outerHTML")?;
let inner_text = eval_js(servo, webview, "document.body.innerText").ok();
let layout_json = eval_js(servo, webview, LAYOUT_JS).ok();
#[expect(clippy::cast_possible_wrap)]
let screenshot = if req.take_screenshot {
let rect = Box2D::new(
Point2D::new(0, 0),
Point2D::new(layout::VIEWPORT_WIDTH as i32, layout::VIEWPORT_HEIGHT as i32),
);
rc_ctx.read_to_image(rect)
} else {
None
};
let js_result = req
.custom_js
.as_deref()
.map(|expr| eval_js(servo, webview, expr))
.transpose()?;
Ok(ServoPage {
html,
inner_text,
layout_json,
screenshot,
js_result,
pdf_data: pdf_data.borrow_mut().take(),
})
}
fn build_servo() -> Result<(Rc<SoftwareRenderingContext>, servo::Servo)> {
let size = PhysicalSize::new(layout::VIEWPORT_WIDTH, layout::VIEWPORT_HEIGHT);
let ctx = SoftwareRenderingContext::new(size).map_err(|e| anyhow!("failed to create rendering context: {e:?}"))?;
ctx.make_current()
.map_err(|e| anyhow!("failed to make context current: {e:?}"))?;
let prefs = Preferences {
dom_webgpu_enabled: false,
dom_webxr_enabled: false,
dom_serviceworker_enabled: false,
dom_bluetooth_enabled: false,
..Preferences::default()
};
let rc = Rc::new(ctx);
let servo = ServoBuilder::default().preferences(prefs).build();
Ok((rc, servo))
}
fn spin_until(servo: &servo::Servo, condition: &Cell<bool>, deadline: Instant, timeout_secs: u64) -> Result<()> {
while !condition.get() {
if Instant::now() > deadline {
return Err(anyhow!(
"page load timed out after {timeout_secs}s (try increasing --timeout)"
));
}
servo.spin_event_loop();
std::thread::sleep(SPIN_INTERVAL);
}
let settle_end = Instant::now() + SETTLE_DURATION;
while Instant::now() < settle_end {
servo.spin_event_loop();
std::thread::sleep(SPIN_INTERVAL);
}
Ok(())
}
fn eval_js(servo: &servo::Servo, webview: &WebView, script: &str) -> Result<String> {
let result: Rc<RefCell<Option<Result<String>>>> = Rc::new(RefCell::new(None));
let cb_result = result.clone();
webview.evaluate_javascript(script, move |js_result| {
let val = match js_result {
Ok(JSValue::String(s)) => Ok(s),
Ok(JSValue::Undefined | JSValue::Null) => Ok(String::new()),
Ok(JSValue::Boolean(b)) => Ok(b.to_string()),
Ok(JSValue::Number(n)) => Ok(n.to_string()),
Ok(other) => jsvalue_to_json(&other).and_then(|v| serde_json::to_string(&v).map_err(|e| anyhow!("{e}"))),
Err(e) => Err(anyhow!("JS eval error: {e:?}")),
};
*cb_result.borrow_mut() = Some(val);
});
let deadline = Instant::now() + JS_EVAL_TIMEOUT;
loop {
servo.spin_event_loop();
if let Some(val) = result.borrow_mut().take() {
return val;
}
if Instant::now() > deadline {
return Err(anyhow!("timeout waiting for JS evaluation"));
}
std::thread::sleep(SPIN_INTERVAL);
}
}
fn jsvalue_to_json(val: &JSValue) -> Result<serde_json::Value> {
const MAX_DEPTH: u8 = 64;
fn convert(val: &JSValue, depth: u8) -> Result<serde_json::Value> {
if depth >= MAX_DEPTH {
return Err(anyhow!("JS value nested too deeply (>{MAX_DEPTH} levels)"));
}
Ok(match val {
JSValue::Undefined | JSValue::Null => serde_json::Value::Null,
JSValue::Boolean(b) => serde_json::Value::Bool(*b),
JSValue::Number(n) => serde_json::json!(n),
JSValue::String(s)
| JSValue::Element(s)
| JSValue::ShadowRoot(s)
| JSValue::Frame(s)
| JSValue::Window(s) => serde_json::Value::String(s.clone()),
JSValue::Array(arr) => {
let items: Result<Vec<_>> = arr.iter().map(|v| convert(v, depth + 1)).collect();
serde_json::Value::Array(items?)
}
JSValue::Object(map) => {
let entries: Result<serde_json::Map<_, _>> = map
.iter()
.map(|(k, v)| Ok((k.clone(), convert(v, depth + 1)?)))
.collect();
serde_json::Value::Object(entries?)
}
})
}
convert(val, 0)
}
#[cfg(unix)]
mod stderr_guard {
use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd};
pub(crate) struct StderrGuard {
saved_fd: Option<OwnedFd>,
}
impl StderrGuard {
#[allow(unsafe_code)]
pub(crate) fn suppress() -> Self {
let saved = unsafe { libc::dup(2) };
if saved < 0 {
return Self { saved_fd: None };
}
let saved_fd = unsafe { OwnedFd::from_raw_fd(saved) };
unsafe { libc::fcntl(saved_fd.as_raw_fd(), libc::F_SETFD, libc::FD_CLOEXEC) };
let Ok(devnull) = std::fs::File::open("/dev/null") else {
return Self { saved_fd: None };
};
let null_fd = devnull.into_raw_fd();
unsafe {
libc::dup2(null_fd, 2);
libc::close(null_fd);
}
Self {
saved_fd: Some(saved_fd),
}
}
}
impl Drop for StderrGuard {
#[allow(unsafe_code)]
fn drop(&mut self) {
if let Some(ref fd) = self.saved_fd {
unsafe {
libc::dup2(fd.as_raw_fd(), 2);
}
}
}
}
}
#[cfg(not(unix))]
mod stderr_guard {
pub(crate) struct StderrGuard;
impl StderrGuard {
pub(crate) fn suppress() -> Self {
Self
}
}
}