use crate::{Engine, EngineConfig, Error, Result, ScriptResult, TextSnapshot};
use reqwest::blocking::Client;
use scraper::{Html, Selector};
use std::collections::VecDeque;
use std::sync::{Arc, Mutex, OnceLock};
use std::time::{Duration, Instant};
use futures::StreamExt;
static TITLE_SELECTOR: OnceLock<Selector> = OnceLock::new();
static BODY_SELECTOR: OnceLock<Selector> = OnceLock::new();
static STYLE_SELECTOR: OnceLock<Selector> = OnceLock::new();
static LINK_STYLESHEET_SELECTOR: OnceLock<Selector> = OnceLock::new();
fn title_selector() -> &'static Selector {
TITLE_SELECTOR.get_or_init(|| Selector::parse("title").unwrap())
}
fn body_selector() -> &'static Selector {
BODY_SELECTOR.get_or_init(|| Selector::parse("body").unwrap())
}
fn style_selector() -> &'static Selector {
STYLE_SELECTOR.get_or_init(|| Selector::parse("style").unwrap())
}
fn link_stylesheet_selector() -> &'static Selector {
LINK_STYLESHEET_SELECTOR.get_or_init(|| Selector::parse("link[rel=\"stylesheet\"]").unwrap())
}
type OnLoadHandler = Arc<dyn Fn(&TextSnapshot) + Send + Sync>;
type OnConsoleHandler = Arc<dyn Fn(&crate::ConsoleMessage) + Send + Sync>;
type OnRequestHandler = Arc<dyn Fn(&crate::RequestInfo) -> crate::RequestAction + Send + Sync>;
struct CssCache {
map: std::collections::HashMap<String, (String, Instant)>,
order: VecDeque<String>,
capacity: usize,
ttl: Duration,
}
impl CssCache {
fn new(capacity: usize, ttl: Duration) -> Self {
Self {
map: std::collections::HashMap::new(),
order: VecDeque::new(),
capacity,
ttl,
}
}
fn get(&mut self, key: &str) -> Option<String> {
if let Some((val, ts)) = self.map.get(key) {
if ts.elapsed() <= self.ttl {
return Some(val.clone());
}
self.map.remove(key);
if let Some(pos) = self.order.iter().position(|k| k == key) {
self.order.remove(pos);
}
}
None
}
fn insert(&mut self, key: String, value: String) {
if self.map.contains_key(&key) {
if let Some(pos) = self.order.iter().position(|k| k == &key) {
self.order.remove(pos);
}
self.order.push_back(key.clone());
self.map.insert(key, (value, Instant::now()));
return;
}
if self.map.len() >= self.capacity {
if let Some(old) = self.order.pop_front() {
self.map.remove(&old);
}
}
self.order.push_back(key.clone());
self.map.insert(key, (value, Instant::now()));
}
}
struct ScriptJob {
code: String,
loop_limit: u64,
recursion_limit: usize,
on_console: Option<OnConsoleHandler>,
resp: std::sync::mpsc::Sender<ScriptResult>,
}
#[allow(clippy::type_complexity)]
static RFOX_CONSOLE_REG: OnceLock<
std::sync::Mutex<
std::collections::HashMap<usize, Arc<dyn Fn(&crate::ConsoleMessage) + Send + Sync>>,
>,
> = OnceLock::new();
fn spawn_script_worker() -> (
std::sync::mpsc::Sender<ScriptJob>,
std::thread::JoinHandle<()>,
) {
let (tx, rx) = std::sync::mpsc::channel::<ScriptJob>();
let handle = std::thread::spawn(move || {
let mut ctx: boa_engine::Context = boa_engine::Context::default();
fn rfox_console_native(
_this: &boa_engine::JsValue,
args: &[boa_engine::JsValue],
ctx: &mut boa_engine::Context,
) -> boa_engine::JsResult<boa_engine::JsValue> {
let ptr = ctx as *const _ as usize;
let map = RFOX_CONSOLE_REG.get_or_init(|| {
std::sync::Mutex::new(std::collections::HashMap::<
usize,
std::sync::Arc<dyn Fn(&crate::ConsoleMessage) + Send + Sync>,
>::new())
});
if let Ok(lock) = map.lock() {
if let Some(cb) = lock.get(&ptr) {
let text = args
.first()
.map(|a| format!("{}", a.display()))
.unwrap_or_default();
let stack = args
.get(1)
.map(|a| format!("{}", a.display()))
.filter(|s| !s.is_empty());
let (source, line_no, col_no) = parse_stack_info(stack.as_deref());
cb(&crate::ConsoleMessage {
level: "log".to_string(),
text,
source,
line: line_no,
column: col_no,
stack,
});
}
}
Ok(boa_engine::JsValue::undefined())
}
let nf = boa_engine::native_function::NativeFunction::from_fn_ptr(
rfox_console_native as boa_engine::native_function::NativeFunctionPointer,
);
let _ = ctx.register_global_builtin_callable(
boa_engine::js_string!("__rfox_console_log"),
0usize,
nf.clone(),
);
let _ = ctx.register_global_builtin_callable(
boa_engine::js_string!("__rfox_console_error"),
0usize,
nf,
);
while let Ok(job) = rx.recv() {
if job.loop_limit > 0 {
ctx.runtime_limits_mut()
.set_loop_iteration_limit(job.loop_limit);
}
if job.recursion_limit < usize::MAX {
ctx.runtime_limits_mut()
.set_recursion_limit(job.recursion_limit);
}
if let Some(cb) = &job.on_console {
let ptr = &ctx as *const _ as usize;
let map = RFOX_CONSOLE_REG.get_or_init(|| {
std::sync::Mutex::new(std::collections::HashMap::<
usize,
std::sync::Arc<dyn Fn(&crate::ConsoleMessage) + Send + Sync>,
>::new())
});
if let Ok(mut lock) = map.lock() {
lock.insert(ptr, cb.clone());
}
}
let script_res = match ctx.eval(boa_engine::Source::from_bytes(job.code.as_bytes())) {
Ok(val) => {
if let Ok(cmsg) = ctx.eval(boa_engine::Source::from_bytes(
"__rfox_console.join('\n')".as_bytes(),
)) {
let console_text = format!("{}", cmsg.display());
if !console_text.is_empty() {
for line in console_text.split('\n') {
if let Some(cb) = &job.on_console {
let cm = crate::ConsoleMessage {
level: "log".to_string(),
text: line.to_string(),
source: None,
line: None,
column: None,
stack: None,
};
cb(&cm);
}
}
}
}
ScriptResult {
value: format!("{}", val.display()),
is_error: false,
}
}
Err(e) => {
if let Ok(cmsg) = ctx.eval(boa_engine::Source::from_bytes(
"__rfox_console.join('\n')".as_bytes(),
)) {
let console_text = format!("{}", cmsg.display());
if !console_text.is_empty() {
for line in console_text.split('\n') {
if let Some(cb) = &job.on_console {
let cm = crate::ConsoleMessage {
level: "error".to_string(),
text: line.to_string(),
source: None,
line: None,
column: None,
stack: None,
};
cb(&cm);
}
}
}
}
let err_msg = format!("Script thrown: {}", e);
ScriptResult {
value: err_msg,
is_error: true,
}
}
};
if job.on_console.is_some() {
let ptr = &ctx as *const _ as usize;
let map = RFOX_CONSOLE_REG.get_or_init(|| {
std::sync::Mutex::new(std::collections::HashMap::<
usize,
std::sync::Arc<dyn Fn(&crate::ConsoleMessage) + Send + Sync>,
>::new())
});
if let Ok(mut lock) = map.lock() {
lock.remove(&ptr);
}
}
let _ = job.resp.send(script_res);
}
});
(tx, handle)
}
fn spawn_process_worker() -> (
std::sync::mpsc::Sender<ScriptJob>,
std::thread::JoinHandle<()>,
std::sync::Arc<std::sync::Mutex<Option<std::process::Child>>>,
) {
use std::io::{BufRead, BufReader, Write};
use std::process::{Command, Stdio};
let (tx, rx) = std::sync::mpsc::channel::<ScriptJob>();
let exe = std::env::var_os("CARGO_BIN_EXE_rfheadless")
.map(std::path::PathBuf::from)
.or_else(|| {
std::env::current_exe().ok().and_then(|p| {
if let Some(parent) = p.parent().and_then(|p| p.parent()) {
let candidate = parent.join("rfheadless");
if candidate.exists() {
return Some(candidate);
}
}
None
})
})
.or_else(|| std::env::current_exe().ok())
.unwrap_or_else(|| std::path::PathBuf::from("./rfheadless"));
let mut child = Command::new(exe)
.arg("--worker")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.expect("failed to spawn worker process");
let stdin_handle = child.stdin.take().expect("worker stdin");
let stdout_handle = child.stdout.take().expect("worker stdout");
let child_ref = std::sync::Arc::new(std::sync::Mutex::new(Some(child)));
let child_ref_for_thread = child_ref.clone();
let handle = std::thread::spawn(move || {
let mut stdin = stdin_handle;
let stdout = stdout_handle;
let mut reader = BufReader::new(stdout);
let mut next_id: u64 = 1;
while let Ok(job) = rx.recv() {
let id = next_id;
next_id += 1;
let job_json = serde_json::json!({ "id": id, "code": job.code, "loop_limit": job.loop_limit, "recursion_limit": job.recursion_limit });
if let Err(e) = writeln!(stdin, "{}", job_json) {
eprintln!("failed to write to worker stdin: {}", e);
let _ = job.resp.send(ScriptResult {
value: format!("Worker write failed: {}", e),
is_error: true,
});
continue;
}
let _ = stdin.flush();
let mut line = String::new();
if let Ok(n) = reader.read_line(&mut line) {
if n == 0 {
if let Ok(mut lock) = child_ref_for_thread.lock() {
if let Some(mut c) = lock.take() {
let _ = c.kill();
let _ = c.wait();
}
}
let _ = job.resp.send(ScriptResult {
value: "Worker closed".to_string(),
is_error: true,
});
break;
}
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&line) {
let val = v
.get("value")
.and_then(|x| x.as_str())
.unwrap_or("")
.to_string();
let is_err = v.get("is_error").and_then(|x| x.as_bool()).unwrap_or(true);
let _ = job.resp.send(ScriptResult {
value: val,
is_error: is_err,
});
} else {
let _ = job.resp.send(ScriptResult {
value: format!("Malformed worker response: {}", line),
is_error: true,
});
}
} else {
let _ = job.resp.send(ScriptResult {
value: "Failed to read worker response".to_string(),
is_error: true,
});
}
}
if let Ok(mut lock) = child_ref_for_thread.lock() {
if let Some(mut c) = lock.take() {
let _ = c.kill();
let _ = c.wait();
}
}
});
(tx, handle, child_ref)
}
fn parse_file_line_col(s: &str) -> Option<(String, u32, u32)> {
let parts: Vec<&str> = s.rsplitn(3, ':').collect();
if parts.len() >= 2 {
if let (Ok(c), Ok(r)) = (
parts[0].trim().trim_end_matches(')').parse::<u32>(),
parts[1].trim().parse::<u32>(),
) {
let src = if parts.len() == 3 {
parts[2].trim().trim_start_matches("at ").trim().to_string()
} else {
"".to_string()
};
return Some((src, r, c));
}
}
None
}
fn parse_stack_info(stack: Option<&str>) -> (Option<String>, Option<u32>, Option<u32>) {
if let Some(s) = stack {
for l in s.lines() {
let l = l.trim();
if let (Some(open), Some(close)) = (l.rfind('('), l.rfind(')')) {
if open < close {
let inside = &l[open + 1..close];
if let Some((src, ln, col)) = parse_file_line_col(inside) {
return (Some(src), Some(ln), Some(col));
}
}
}
if let Some(atpos) = l.find('@') {
let after = &l[atpos + 1..];
if let Some((src, ln, col)) = parse_file_line_col(after) {
return (Some(src), Some(ln), Some(col));
}
}
if let Some((src, ln, col)) = parse_file_line_col(l) {
return (Some(src), Some(ln), Some(col));
}
}
}
(None, None, None)
}
pub struct RFEngine {
client: Client,
config: EngineConfig,
last_html: Option<String>,
last_url: Option<String>,
styles: Vec<String>,
scratch_json: String,
scratch_styles: String,
on_load: Option<OnLoadHandler>,
on_console: Option<OnConsoleHandler>,
on_request: Option<OnRequestHandler>,
async_runtime: Option<tokio::runtime::Runtime>,
stylesheet_sem: Option<std::sync::Arc<tokio::sync::Semaphore>>,
async_client: Option<reqwest::Client>,
css_cache: Option<std::sync::Arc<Mutex<CssCache>>>,
script_worker_tx: Option<std::sync::mpsc::Sender<ScriptJob>>,
script_worker_handle: Option<std::thread::JoinHandle<()>>,
script_worker_child: Option<std::sync::Arc<std::sync::Mutex<Option<std::process::Child>>>>,
page_worker_tx: Option<std::sync::mpsc::Sender<ScriptJob>>,
page_worker_handle: Option<std::thread::JoinHandle<()>>,
page_worker_child: Option<std::sync::Arc<std::sync::Mutex<Option<std::process::Child>>>>,
}
impl RFEngine {
fn extract_styles(&mut self, base_url: &str) {
if self.last_html.is_none() {
return;
}
let html = self.last_html.as_ref().unwrap();
let document = Html::parse_document(html);
let style_sel = style_selector();
for node in document.select(style_sel) {
let txt = node.text().collect::<String>();
if !txt.trim().is_empty() {
self.styles.push(txt);
}
}
let link_sel = link_stylesheet_selector();
let hrefs: Vec<String> = document
.select(link_sel)
.filter_map(|node| node.value().attr("href").map(|s| s.to_string()))
.collect();
if !hrefs.is_empty() {
let css_urls: Vec<String> = hrefs
.into_iter()
.map(|href| {
if let Ok(base) = url::Url::parse(base_url) {
base.join(&href)
.map(|u| u.to_string())
.unwrap_or_else(|_| href.clone())
} else {
href.clone()
}
})
.collect();
let sem_opt = self.stylesheet_sem.clone();
let concurrency = self.config.stylesheet_fetch_concurrency;
let client_opt = self.async_client.clone();
let enable_preconnect = self.config.enable_preconnect;
let cache_arc_opt = self.css_cache.clone();
let fetch_fut = async move {
let client = match client_opt {
Some(ac) => ac,
None => reqwest::Client::new(),
};
if enable_preconnect {
use std::collections::HashSet;
let mut seen = HashSet::new();
let mut head_urls = Vec::new();
for u in css_urls.iter() {
if let Ok(parsed) = url::Url::parse(u) {
let host_key = format!(
"{}:{}:{}",
parsed.scheme(),
parsed.host_str().unwrap_or_default(),
parsed.port_or_known_default().unwrap_or(0)
);
if !seen.contains(&host_key) {
seen.insert(host_key);
head_urls.push(u.clone());
}
}
}
if !head_urls.is_empty() {
let head_count = head_urls.len();
let head_stream = futures::stream::iter(head_urls)
.map(|u| {
let c = client.clone();
async move {
let _ = c.head(&u).send().await;
}
})
.buffer_unordered(std::cmp::min(4usize, head_count));
let _ = head_stream.collect::<Vec<_>>().await;
}
}
let cache_opt = cache_arc_opt.clone();
let stream = futures::stream::iter(css_urls)
.map(move |u| {
let c = client.clone();
let sem = sem_opt.clone();
let cache = cache_opt.clone();
async move {
if let Some(cache_arc) = &cache {
if let Ok(mut lock) = cache_arc.lock() {
if let Some(v) = lock.get(&u) {
return Some(v);
}
}
}
let _permit = match sem {
Some(s) => Some(s.acquire_owned().await.ok()),
None => None,
};
match c.get(&u).send().await {
Ok(resp) => match resp.text().await {
Ok(t) => {
if t.trim().is_empty() {
None
} else {
if let Some(cache_arc) = &cache {
if let Ok(mut lock) = cache_arc.lock() {
lock.insert(u.clone(), t.clone());
}
}
Some(t)
}
}
Err(_) => None,
},
Err(_) => None,
}
}
})
.buffer_unordered(concurrency);
stream.collect::<Vec<_>>().await
};
if !self.config.wait_for_stylesheets_on_load {
if let Some(rt) = &self.async_runtime {
std::mem::drop(rt.spawn(fetch_fut));
} else {
std::thread::spawn(move || {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("failed to build runtime");
let _ = rt.block_on(fetch_fut);
});
}
} else {
let results: Vec<Option<String>> = if let Some(rt) = &self.async_runtime {
rt.block_on(fetch_fut)
} else if let Ok(handle) = tokio::runtime::Handle::try_current() {
handle.block_on(fetch_fut)
} else {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("failed to build runtime");
rt.block_on(fetch_fut)
};
for opt in results.into_iter().flatten() {
self.styles.push(opt);
}
}
}
}
fn serialize_elements_stream(&mut self, document: &Html) -> String {
self.scratch_json.clear();
self.scratch_json.push('[');
let root = document.root_element();
let mut stack: Vec<(scraper::element_ref::ElementRef, Option<usize>)> = vec![(root, None)];
let mut first = true;
let mut idx: usize = 0;
let mut text_buf = String::new();
let mut attrs_s = String::new();
while let Some((node, parent_idx)) = stack.pop() {
if !first {
self.scratch_json.push(',');
} else {
first = false;
}
let tag_js =
serde_json::to_string(node.value().name()).unwrap_or_else(|_| "\"\"".to_string());
let id_js = serde_json::to_string(node.value().attr("id").unwrap_or(""))
.unwrap_or_else(|_| "\"\"".to_string());
let class_js = serde_json::to_string(node.value().attr("class").unwrap_or(""))
.unwrap_or_else(|_| "\"\"".to_string());
text_buf.clear();
for t in node.text() {
text_buf.push_str(t);
}
let text_js = serde_json::to_string(&text_buf).unwrap_or_else(|_| "\"\"".to_string());
attrs_s.clear();
attrs_s.push('[');
let mut first_attr = true;
for (k, v) in node.value().attrs() {
if !first_attr {
attrs_s.push(',');
} else {
first_attr = false;
}
let k_js = serde_json::to_string(k).unwrap_or_else(|_| "\"\"".to_string());
let v_js = serde_json::to_string(v).unwrap_or_else(|_| "\"\"".to_string());
attrs_s.push('[');
attrs_s.push_str(&k_js);
attrs_s.push(',');
attrs_s.push_str(&v_js);
attrs_s.push(']');
}
attrs_s.push(']');
let parent_js = if let Some(p) = parent_idx {
p.to_string()
} else {
"null".to_string()
};
self.scratch_json.push_str("{\"tag\":");
self.scratch_json.push_str(&tag_js);
self.scratch_json.push_str(",\"id\":");
self.scratch_json.push_str(&id_js);
self.scratch_json.push_str(",\"class\":");
self.scratch_json.push_str(&class_js);
self.scratch_json.push_str(",\"text\":");
self.scratch_json.push_str(&text_js);
self.scratch_json.push_str(",\"attributes\":");
self.scratch_json.push_str(&attrs_s);
self.scratch_json.push_str(",\"parent\":");
self.scratch_json.push_str(&parent_js);
self.scratch_json.push('}');
let children: Vec<_> = node
.children()
.filter_map(scraper::ElementRef::wrap)
.collect();
for child in children.into_iter().rev() {
stack.push((child, Some(idx)));
}
idx += 1;
}
self.scratch_json.push(']');
self.scratch_json.clone()
}
fn serialize_styles_array(&mut self) -> String {
self.scratch_styles.clear();
self.scratch_styles.push('[');
let mut first = true;
for s in &self.styles {
if !first {
self.scratch_styles.push(',');
} else {
first = false;
}
let s_js = serde_json::to_string(s).unwrap_or_else(|_| "\"\"".to_string());
self.scratch_styles.push_str(&s_js);
}
self.scratch_styles.push(']');
self.scratch_styles.clone()
}
}
impl Engine for RFEngine {
fn new(config: EngineConfig) -> Result<Self>
where
Self: Sized,
{
let client = Client::builder()
.timeout(Duration::from_millis(config.timeout_ms))
.build()
.map_err(|e| {
Error::InitializationError(format!("Failed to build HTTP client: {}", e))
})?;
let mut async_runtime = None;
let mut stylesheet_sem = None;
if config.enable_persistent_runtime {
let rt = tokio::runtime::Builder::new_multi_thread()
.worker_threads(4)
.enable_all()
.build()
.expect("failed to create runtime");
stylesheet_sem = Some(std::sync::Arc::new(tokio::sync::Semaphore::new(
config.stylesheet_fetch_concurrency,
)));
async_runtime = Some(rt);
}
let async_client = Some(
reqwest::Client::builder()
.pool_max_idle_per_host(std::cmp::max(4, config.stylesheet_fetch_concurrency))
.tcp_keepalive(Some(Duration::from_secs(60)))
.build()
.expect("failed to build async client"),
);
let mut script_worker_tx = None;
let mut script_worker_handle = None;
let mut script_worker_child = None;
if config.enable_javascript && !config.enable_js_isolation {
if config.use_process_worker {
let (tx, handle, child_ref) = spawn_process_worker();
script_worker_tx = Some(tx);
script_worker_handle = Some(handle);
script_worker_child = Some(child_ref);
} else {
let (tx, handle) = spawn_script_worker();
script_worker_tx = Some(tx);
script_worker_handle = Some(handle);
}
}
Ok(Self {
client,
config,
last_html: None,
last_url: None,
styles: Vec::new(),
scratch_json: String::with_capacity(4096),
scratch_styles: String::with_capacity(1024),
on_load: None,
on_console: None,
on_request: None,
async_runtime,
stylesheet_sem,
async_client,
css_cache: Some(std::sync::Arc::new(Mutex::new(CssCache::new(
128,
Duration::from_millis(5_000),
)))),
script_worker_tx,
script_worker_handle,
script_worker_child,
page_worker_tx: None,
page_worker_handle: None,
page_worker_child: None,
})
}
fn load_url(&mut self, url: &str) -> Result<()> {
let resp = self
.client
.get(url)
.header("User-Agent", self.config.user_agent.clone())
.send()
.map_err(|e| Error::LoadError(format!("Failed to fetch {}: {}", url, e)))?;
let body = resp
.text()
.map_err(|e| Error::LoadError(format!("Failed to read response body: {}", e)))?;
self.last_html = Some(body);
self.last_url = Some(url.to_string());
self.styles.clear();
self.extract_styles(url);
if self.config.enable_javascript && self.config.enable_js_isolation {
if let Some(tx) = self.page_worker_tx.take() {
drop(tx);
}
if let Some(h) = self.page_worker_handle.take() {
let _ = h.join();
}
let (tx, handle, child_ref) = if self.config.use_process_worker {
let (t, h, c) = spawn_process_worker();
(t, h, Some(c))
} else {
let (t, h) = spawn_script_worker();
(t, h, None)
};
let html_ref: &str = self.last_html.as_deref().unwrap_or("");
let document = Html::parse_document(html_ref);
let elements_json = self.serialize_elements_stream(&document);
let styles_json = self.serialize_styles_array();
let title = document
.select(&Selector::parse("title").unwrap())
.next()
.map(|n| n.text().collect::<String>())
.unwrap_or_default();
let body_text = document
.select(&Selector::parse("body").unwrap())
.next()
.map(|n| n.text().collect::<String>())
.unwrap_or_default();
let harness = include_str!("rf_harness.js")
.replace("__RFOX_ELEMENTS__", &elements_json)
.replace("__RFOX_STYLES__", &styles_json)
.replace(
"__RFOX_TITLE__",
&serde_json::to_string(&title).unwrap_or_else(|_| "\"\"".to_string()),
)
.replace(
"__RFOX_BODY__",
&serde_json::to_string(&body_text).unwrap_or_else(|_| "\"\"".to_string()),
);
let (resp_tx, resp_rx) = std::sync::mpsc::channel::<ScriptResult>();
let job = ScriptJob {
code: harness,
loop_limit: self.config.script_loop_iteration_limit,
recursion_limit: self.config.script_recursion_limit,
on_console: self.on_console.clone(),
resp: resp_tx,
};
let _ = tx.send(job);
let _ = resp_rx.recv_timeout(std::time::Duration::from_millis(
self.config.script_timeout_ms,
));
self.page_worker_tx = Some(tx);
self.page_worker_handle = Some(handle);
self.page_worker_child = child_ref;
}
if let Some(cb) = &self.on_load {
if let Ok(snapshot) = self.render_text_snapshot() {
cb(&snapshot);
}
}
Ok(())
}
fn render_text_snapshot(&self) -> Result<TextSnapshot> {
let html = self
.last_html
.as_ref()
.ok_or_else(|| Error::RenderError("No document loaded".into()))?;
let document = Html::parse_document(html);
let title = document
.select(title_selector())
.next()
.map(|n| n.text().collect::<String>())
.unwrap_or_default();
let text = document
.select(body_selector())
.next()
.map(|b| b.text().collect::<String>())
.unwrap_or_default();
Ok(TextSnapshot {
title,
text,
url: self.last_url.clone().unwrap_or_default(),
})
}
fn render_png(&self) -> Result<Vec<u8>> {
Err(Error::RenderError(
"Screenshots are not supported by RFEngine".into(),
))
}
fn evaluate_script(&mut self, script: &str) -> Result<ScriptResult> {
if !self.config.enable_javascript {
return Err(Error::ScriptError(
"JavaScript is disabled in config".into(),
));
}
let html = self
.last_html
.as_ref()
.ok_or_else(|| Error::ScriptError("No document loaded".into()))?;
let document = Html::parse_document(html);
let title = document
.select(&Selector::parse("title").unwrap())
.next()
.map(|n| n.text().collect::<String>())
.unwrap_or_default();
let body_text = document
.select(&Selector::parse("body").unwrap())
.next()
.map(|n| n.text().collect::<String>())
.unwrap_or_default();
let elements_json = self.serialize_elements_stream(&document);
let styles_json = self.serialize_styles_array();
let harness = include_str!("rf_harness.js")
.replace("__RFOX_ELEMENTS__", &elements_json)
.replace("__RFOX_STYLES__", &styles_json)
.replace(
"__RFOX_TITLE__",
&serde_json::to_string(&title).unwrap_or_else(|_| "\"\"".to_string()),
)
.replace(
"__RFOX_BODY__",
&serde_json::to_string(&body_text).unwrap_or_else(|_| "\"\"".to_string()),
);
use std::collections::HashMap;
use std::sync::mpsc::channel;
use std::sync::{Arc, Mutex, OnceLock};
use std::thread;
#[allow(clippy::type_complexity)]
static RFOX_CONSOLE_REG: OnceLock<
Mutex<HashMap<usize, Arc<dyn Fn(&crate::ConsoleMessage) + Send + Sync>>>,
> = OnceLock::new();
let on_console_cb = self.on_console.clone();
let loop_limit = self.config.script_loop_iteration_limit;
let recursion_limit = self.config.script_recursion_limit;
let timeout_ms = self.config.script_timeout_ms;
let code = format!(
"{}\n;\n(function(){{try{{return ({});}}catch(e){{throw e;}}}})()",
harness, script
);
let worker_tx_opt = if self.config.enable_js_isolation {
self.page_worker_tx.as_ref()
} else {
self.script_worker_tx.as_ref()
};
if let Some(tx) = worker_tx_opt {
let (job_tx, job_rx) = std::sync::mpsc::channel::<ScriptResult>();
let job = ScriptJob {
code,
loop_limit,
recursion_limit,
on_console: on_console_cb.clone(),
resp: job_tx,
};
if let Err(e) = tx.send(job) {
return Ok(ScriptResult {
value: format!("Failed to queue script job: {}", e),
is_error: true,
});
}
match job_rx.recv_timeout(std::time::Duration::from_millis(timeout_ms)) {
Ok(r) => Ok(r),
Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {
if let Some(cb) = &self.on_console {
cb(&crate::ConsoleMessage {
level: "error".into(),
text: format!("Script timed out after {}ms", timeout_ms),
source: None,
line: None,
column: None,
stack: None,
});
}
Ok(ScriptResult {
value: format!("Script timed out after {}ms", timeout_ms),
is_error: true,
})
}
Err(e) => Ok(ScriptResult {
value: format!("Script execution failed to receive result: {}", e),
is_error: true,
}),
}
} else {
let (tx, rx) = channel();
thread::spawn(move || {
let mut ctx: boa_engine::Context = boa_engine::Context::default();
if loop_limit > 0 {
ctx.runtime_limits_mut()
.set_loop_iteration_limit(loop_limit);
}
if recursion_limit < usize::MAX {
ctx.runtime_limits_mut()
.set_recursion_limit(recursion_limit);
}
fn rfox_console_native(
_this: &boa_engine::JsValue,
args: &[boa_engine::JsValue],
ctx: &mut boa_engine::Context,
) -> boa_engine::JsResult<boa_engine::JsValue> {
let ptr = ctx as *const _ as usize;
let map = RFOX_CONSOLE_REG.get_or_init(|| {
Mutex::new(HashMap::<
usize,
Arc<dyn Fn(&crate::ConsoleMessage) + Send + Sync>,
>::new())
});
if let Ok(lock) = map.lock() {
if let Some(cb) = lock.get(&ptr) {
let text = args
.first()
.map(|a| format!("{}", a.display()))
.unwrap_or_default();
let stack = args
.get(1)
.map(|a| format!("{}", a.display()))
.filter(|s| !s.is_empty());
let (source, line_no, col_no) = parse_stack_info(stack.as_deref());
cb(&crate::ConsoleMessage {
level: "log".to_string(),
text,
source,
line: line_no,
column: col_no,
stack,
});
}
}
Ok(boa_engine::JsValue::undefined())
}
if let Some(cb_ref) = &on_console_cb {
let cb = cb_ref.clone();
let ptr = &ctx as *const _ as usize;
let map = RFOX_CONSOLE_REG.get_or_init(|| {
Mutex::new(HashMap::<
usize,
Arc<dyn Fn(&crate::ConsoleMessage) + Send + Sync>,
>::new())
});
let nf = boa_engine::native_function::NativeFunction::from_fn_ptr(
rfox_console_native as boa_engine::native_function::NativeFunctionPointer,
);
let _ = ctx.register_global_builtin_callable(
boa_engine::js_string!("__rfox_console_log"),
0usize,
nf,
);
let nf2 = boa_engine::native_function::NativeFunction::from_fn_ptr(
rfox_console_native as boa_engine::native_function::NativeFunctionPointer,
);
let _ = ctx.register_global_builtin_callable(
boa_engine::js_string!("__rfox_console_error"),
0usize,
nf2,
);
if let Ok(mut lock) = map.lock() {
lock.insert(ptr, cb);
}
}
let result = match ctx.eval(boa_engine::Source::from_bytes(code.as_bytes())) {
Ok(val) => {
if let Ok(cmsg) = ctx.eval(boa_engine::Source::from_bytes(
"__rfox_console.join('\n')".as_bytes(),
)) {
let console_text = format!("{}", cmsg.display());
if !console_text.is_empty() {
for line in console_text.split('\n') {
if let Some(cb) = &on_console_cb {
let cm = crate::ConsoleMessage {
level: "log".to_string(),
text: line.to_string(),
source: None,
line: None,
column: None,
stack: None,
};
cb(&cm);
}
}
}
}
Ok(ScriptResult {
value: format!("{}", val.display()),
is_error: false,
})
}
Err(e) => {
if let Ok(cmsg) = ctx.eval(boa_engine::Source::from_bytes(
"__rfox_console.join('\n')".as_bytes(),
)) {
let console_text = format!("{}", cmsg.display());
if !console_text.is_empty() {
for line in console_text.split('\n') {
if let Some(cb) = &on_console_cb {
let cm = crate::ConsoleMessage {
level: "error".to_string(),
text: line.to_string(),
source: None,
line: None,
column: None,
stack: None,
};
cb(&cm);
}
}
}
}
let err_msg = format!("Script thrown: {}", e);
Ok(ScriptResult {
value: err_msg,
is_error: true,
})
}
};
let ptr = &ctx as *const _ as usize;
let map = RFOX_CONSOLE_REG.get_or_init(|| {
Mutex::new(HashMap::<
usize,
Arc<dyn Fn(&crate::ConsoleMessage) + Send + Sync>,
>::new())
});
if let Ok(mut lock) = map.lock() {
lock.remove(&ptr);
}
let _ = tx.send(result);
});
match rx.recv_timeout(std::time::Duration::from_millis(timeout_ms)) {
Ok(r) => r,
Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {
if let Some(cb) = &self.on_console {
cb(&crate::ConsoleMessage {
level: "error".into(),
text: format!("Script timed out after {}ms", timeout_ms),
source: None,
line: None,
column: None,
stack: None,
});
}
Ok(ScriptResult {
value: format!("Script timed out after {}ms", timeout_ms),
is_error: true,
})
}
Err(e) => Ok(ScriptResult {
value: format!("Script execution failed to receive result: {}", e),
is_error: true,
}),
}
}
}
fn evaluate_script_in_page(&mut self, script: &str) -> Result<ScriptResult> {
self.evaluate_script(script)
}
fn on_load<F>(&mut self, cb: F)
where
F: Fn(&crate::TextSnapshot) + Send + Sync + 'static,
{
self.on_load = Some(Arc::new(cb));
}
fn clear_on_load(&mut self) {
self.on_load = None;
}
fn on_console<F>(&mut self, cb: F)
where
F: Fn(&crate::ConsoleMessage) + Send + Sync + 'static,
{
self.on_console = Some(Arc::new(cb));
}
fn clear_on_console(&mut self) {
self.on_console = None;
}
fn on_request<F>(&mut self, cb: F)
where
F: Fn(&crate::RequestInfo) -> crate::RequestAction + Send + Sync + 'static,
{
self.on_request = Some(Arc::new(cb));
}
fn clear_on_request(&mut self) {
self.on_request = None;
}
fn get_cookies(&self) -> Result<Vec<crate::Cookie>> {
Ok(vec![])
}
fn set_cookies(&mut self, _cookies: Vec<crate::CookieParam>) -> Result<()> {
Ok(())
}
fn delete_cookie(
&mut self,
_name: &str,
_url: Option<&str>,
_domain: Option<&str>,
_path: Option<&str>,
) -> Result<()> {
Ok(())
}
fn clear_cookies(&mut self) -> Result<()> {
Ok(())
}
fn close(self) -> Result<()> {
if let Some(tx) = self.script_worker_tx {
drop(tx);
}
if let Some(h) = self.script_worker_handle {
let _ = h.join();
}
if let Some(tx) = self.page_worker_tx {
drop(tx);
}
if let Some(h) = self.page_worker_handle {
let _ = h.join();
}
if let Some(rt) = self.async_runtime {
drop(rt);
}
if let Some(_sem) = self.stylesheet_sem {
}
Ok(())
}
}
impl RFEngine {
pub fn abort_running_script(&mut self) -> Result<()> {
if let Some(old_tx) = self.script_worker_tx.take() {
drop(old_tx);
}
if let Some(child_ref) = self.script_worker_child.take() {
if let Ok(mut lock) = child_ref.lock() {
if let Some(mut c) = lock.take() {
let _ = c.kill();
let _ = c.wait();
}
}
}
if let Some(h) = self.script_worker_handle.take() {
let _ = h.join();
}
if self.config.enable_javascript && !self.config.enable_js_isolation {
let (tx, h, _child_ref) = if self.config.use_process_worker {
let (t, h, c) = spawn_process_worker();
(t, h, Some(c))
} else {
let (t, h) = spawn_script_worker();
(t, h, None)
};
self.script_worker_tx = Some(tx);
self.script_worker_handle = Some(h);
self.script_worker_child = _child_ref;
}
if let Some(old_tx) = self.page_worker_tx.take() {
drop(old_tx);
}
if let Some(child_ref) = self.page_worker_child.take() {
if let Ok(mut lock) = child_ref.lock() {
if let Some(mut c) = lock.take() {
let _ = c.kill();
let _ = c.wait();
}
}
}
if let Some(h) = self.page_worker_handle.take() {
let _ = h.join();
}
if self.config.enable_javascript
&& self.config.enable_js_isolation
&& self.last_html.is_some()
{
let (tx, h, child_ref) = if self.config.use_process_worker {
let (t, h, c) = spawn_process_worker();
(t, h, Some(c))
} else {
let (t, h) = spawn_script_worker();
(t, h, None)
};
let html = self.last_html.clone().unwrap_or_default();
let document = Html::parse_document(&html);
let mut elements = Vec::new();
let root = document.root_element();
let mut stack: Vec<(scraper::element_ref::ElementRef, Option<usize>)> =
vec![(root, None)];
while let Some((node, parent_idx)) = stack.pop() {
let tag = node.value().name().to_string();
let id = node
.value()
.attr("id")
.map(|s| s.to_string())
.unwrap_or_default();
let class = node
.value()
.attr("class")
.map(|s| s.to_string())
.unwrap_or_default();
let text = node.text().collect::<String>();
let attrs = node
.value()
.attrs()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect::<Vec<_>>();
let idx = elements.len();
elements.push(serde_json::json!({"tag": tag, "id": id, "class": class, "text": text, "attributes": attrs, "parent": parent_idx}));
let children: Vec<_> = node
.children()
.filter_map(scraper::ElementRef::wrap)
.collect();
for child in children.into_iter().rev() {
stack.push((child, Some(idx)));
}
}
let elements_json = self.serialize_elements_stream(&document);
let styles_json = self.serialize_styles_array();
let title = document
.select(&Selector::parse("title").unwrap())
.next()
.map(|n| n.text().collect::<String>())
.unwrap_or_default();
let body_text = document
.select(&Selector::parse("body").unwrap())
.next()
.map(|n| n.text().collect::<String>())
.unwrap_or_default();
let harness = include_str!("rf_harness.js")
.replace("__RFOX_ELEMENTS__", &elements_json)
.replace("__RFOX_STYLES__", &styles_json)
.replace(
"__RFOX_TITLE__",
&serde_json::to_string(&title).unwrap_or_else(|_| "\"\"".to_string()),
)
.replace(
"__RFOX_BODY__",
&serde_json::to_string(&body_text).unwrap_or_else(|_| "\"\"".to_string()),
);
let (resp_tx, resp_rx) = std::sync::mpsc::channel::<ScriptResult>();
let job = ScriptJob {
code: harness,
loop_limit: self.config.script_loop_iteration_limit,
recursion_limit: self.config.script_recursion_limit,
on_console: self.on_console.clone(),
resp: resp_tx,
};
let _ = tx.send(job);
let _ = resp_rx.recv_timeout(std::time::Duration::from_millis(
self.config.script_timeout_ms,
));
self.page_worker_tx = Some(tx);
self.page_worker_handle = Some(h);
self.page_worker_child = child_ref;
}
Ok(())
}
pub fn snapshot_page_context(&mut self) -> Result<String> {
let res = self.evaluate_script("__rfox_snapshot()")?;
Ok(res.value)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rfengine_load_and_eval() {
if std::env::var("CI").is_ok() {
return;
}
let server = tiny_http::Server::http("0.0.0.0:0").unwrap();
let addr = server.server_addr();
std::thread::spawn(move || {
if let Ok(request) = server.recv() {
let response = tiny_http::Response::from_string(
"<html><head><title>RF</title><style>body{color:red}</style></head><body><div id=\"hello\" class=\"greeting\">Hello RF</div></body></html>",
);
let _ = request.respond(response);
}
});
let url = format!("http://{}", addr);
let mut engine =
RFEngine::new(crate::EngineConfig::default()).expect("Failed to create RFEngine");
engine.load_url(&url).expect("Failed to load URL");
let snap = engine
.render_text_snapshot()
.expect("Failed to render snapshot");
assert!(snap.title.contains("RF"));
assert!(snap.text.contains("Hello RF"));
if engine.config.enable_javascript {
let res = engine
.evaluate_script("document.title")
.expect("Eval failed");
assert!(res.value.contains("RF"));
let res2 = engine
.evaluate_script("document.querySelector('#hello').textContent()")
.expect("Eval failed");
assert!(res2.value.contains("Hello"));
let res_missing = engine
.evaluate_script("document.querySelector('#nope').textContent()")
.expect("Eval failed");
println!(
"missing -> value='{}' is_error={}",
res_missing.value, res_missing.is_error
);
let mut v = res_missing.value.trim().to_string();
if v.len() >= 2 && v.starts_with('"') && v.ends_with('"') {
v = v[1..v.len() - 1].to_string();
}
assert!(v.is_empty() || v == "null" || v == "undefined");
let dom_dump = engine
.evaluate_script("JSON.stringify(__rfox_dom)")
.expect("DOM dump failed");
println!("__rfox_dom: {}", dom_dump.value);
let attr = engine
.evaluate_script("document.querySelector('#hello').getAttribute('class')")
.expect("Eval failed");
assert!(attr.value.contains("greeting"));
let res_dt = engine.evaluate_script("(()=>{ document.querySelector('#hello').setAttribute('data-test','42'); return document.querySelector('#hello').getAttribute('data-test'); })()").expect("Eval failed");
assert!(res_dt.value.contains("42"));
let captured = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
let c_clone = captured.clone();
let flag = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
let f_clone = flag.clone();
engine.on_console(move |m| {
f_clone.store(true, std::sync::atomic::Ordering::SeqCst);
if let Ok(mut v) = c_clone.lock() {
v.push(format!(
"{}||{}",
m.text.clone(),
m.stack.clone().unwrap_or_default()
));
}
});
let _ = engine
.evaluate_script("(()=>{ console.log('from-js'); return 'ok'; })()")
.expect("Eval failed");
assert!(flag.load(std::sync::atomic::Ordering::SeqCst));
if let Ok(v) = captured.lock() {
assert!(v.iter().any(|s| {
let parts: Vec<&str> = s.split("||").collect();
if parts.len() == 2 {
let head = parts[0].trim().trim_matches('"');
let tail = parts[1].trim().trim_matches('"');
head == "from-js" && !tail.is_empty()
} else {
false
}
}));
}
let res_inline = engine
.evaluate_script(
"(()=>{ console.log('inline'); return __rfox_console.join('\\n'); })()",
)
.expect("inline eval failed");
println!("inline console eval: {}", res_inline.value);
}
}
#[test]
fn test_parse_stack_variants() {
let v8 = "Error\n at Object.<anonymous> (/path/to/file.js:10:15)\n at other";
let (src, line, col) = super::parse_stack_info(Some(v8));
assert!(src.unwrap_or_default().contains("/path/to/file.js"));
assert_eq!(line, Some(10));
assert_eq!(col, Some(15));
let ff = "func@http://localhost/script.js:20:5\nanother";
let (src2, line2, col2) = super::parse_stack_info(Some(ff));
assert!(src2.unwrap_or_default().contains("script.js"));
assert_eq!(line2, Some(20));
assert_eq!(col2, Some(5));
let minimal = "file.js:30:3";
let (_s3, l3, c3) = super::parse_stack_info(Some(minimal));
assert_eq!(l3, Some(30));
assert_eq!(c3, Some(3));
}
#[test]
fn test_element_api_and_computed_style() {
if std::env::var("CI").is_ok() {
return;
}
let server = tiny_http::Server::http("0.0.0.0:0").unwrap();
let addr = server.server_addr();
std::thread::spawn(move || {
if let Ok(request) = server.recv() {
let response = tiny_http::Response::from_string(
"<html><head><title>RF</title><style>body{color:blue}.greeting{color:green}#hello{color:red;font-size:12px}</style></head><body><div id=\"hello\" class=\"greeting\">Hello RF</div></body></html>",
);
let _ = request.respond(response);
}
});
let url = format!("http://{}", addr);
let mut engine =
RFEngine::new(crate::EngineConfig::default()).expect("Failed to create RFEngine");
engine.load_url(&url).expect("Failed to load URL");
if engine.config.enable_javascript {
let ds = engine.evaluate_script("(()=>{ var el=document.querySelector('#hello'); el.setAttribute('data-foo','bar'); return el.dataset.foo; })()").expect("Eval failed");
assert!(ds.value.contains("bar"));
let cls = engine.evaluate_script("(()=>{ var el=document.querySelector('#hello'); el.classList.add('x'); var a=el.getAttribute('class'); el.classList.remove('x'); return a; })()").expect("Eval failed");
assert!(cls.value.contains("x"));
let contains = engine.evaluate_script("(()=>{ var el=document.querySelector('#hello'); el.classList.add('y'); return el.classList.contains('y'); })()").expect("Eval failed");
assert!(contains.value.contains("true"));
let ih = engine.evaluate_script("(()=>{ var el=document.querySelector('#hello'); el.innerHTML('<b>Bold</b>'); return el.innerHTML(); })()").expect("Eval failed");
println!("ih -> {}", ih.value);
assert!(ih.value.contains("Bold"));
let ds_set = engine.evaluate_script("(()=>{ var el=document.querySelector('#hello'); el.dataset.set('foo','baz'); return el.getAttribute('data-foo'); })()").expect("Eval failed");
assert!(ds_set.value.contains("baz"));
let cls = engine.evaluate_script("(()=>{ var el=document.querySelector('#hello'); el.classList.add('x'); var a=el.getAttribute('class'); var len=el.classList.length(); el.classList.remove('x'); return JSON.stringify({class:a,len:len}); })()").expect("Eval failed");
assert!(cls.value.contains("x"));
assert!(cls.value.contains("len"));
let spec = engine.evaluate_script("(()=>{ return getComputedStyle(document.querySelector('#hello')).getPropertyValue('color'); })()").expect("Eval failed");
assert!(spec.value.contains("#ff0000"));
}
}
#[test]
fn test_script_timeout_and_runtime_limits() {
if std::env::var("CI").is_ok() {
return;
}
let mut engine =
RFEngine::new(crate::EngineConfig::default()).expect("Failed to create RFEngine");
let server = tiny_http::Server::http("0.0.0.0:0").unwrap();
let addr = server.server_addr();
std::thread::spawn(move || {
if let Ok(request) = server.recv() {
let response = tiny_http::Response::from_string(
"<html><head><title>RF</title></head><body></body></html>",
);
let _ = request.respond(response);
}
});
let url = format!("http://{}", addr);
engine.load_url(&url).expect("Failed to load URL");
engine.config.script_timeout_ms = 10;
if engine.config.enable_javascript {
let res = engine
.evaluate_script("(()=>{ while(true){} })() ")
.expect("Eval failed");
assert!(res.is_error);
assert!(
res.value.to_lowercase().contains("timed out")
|| res.value.to_lowercase().contains("loop")
|| res.value.to_lowercase().contains("thrown")
);
}
engine.config.script_timeout_ms = 5000;
engine.config.script_loop_iteration_limit = 100;
if engine.config.enable_javascript {
let res2 = engine
.evaluate_script("(()=>{ var i=0; while(true) { i++; } })() ")
.expect("Eval failed");
assert!(res2.is_error);
assert!(
res2.value.to_lowercase().contains("loop")
|| res2.value.to_lowercase().contains("thrown")
);
}
}
#[test]
fn test_microtasks_and_timers() {
if std::env::var("CI").is_ok() {
return;
}
let server = tiny_http::Server::http("0.0.0.0:0").unwrap();
let addr = server.server_addr();
std::thread::spawn(move || {
let mut i = 0;
while let Ok(request) = server.recv() {
let response = if i == 0 {
tiny_http::Response::from_string(
"<html><head><title>RF</title></head><body></body></html>",
)
} else {
tiny_http::Response::from_string("<html><head><title>RF2</title></head><body><div id=\"x\">B</div></body></html>")
};
let _ = request.respond(response);
i += 1;
if i >= 2 {
break;
}
}
});
let url = format!("http://{}", addr);
let mut engine =
RFEngine::new(crate::EngineConfig::default()).expect("Failed to create RFEngine");
engine.load_url(&url).expect("Failed to load URL");
if engine.config.enable_javascript {
let res = engine.evaluate_script("(()=>{ var out=[]; queueMicrotask(function(){ out.push('m'); console.log('micro'); }); setTimeout(function(){ out.push('t'); console.log('timer'); }, 0); __rfox_run_until_idle(); return out.join(','); })()").expect("Eval failed");
assert!(res.value.contains("m") && res.value.contains("t"));
let res2 = engine.evaluate_script("(()=>{ var out=[]; var id=setTimeout(function(){ out.push('x'); }, 0); clearTimeout(id); __rfox_run_until_idle(); return out.join(','); })()").expect("Eval failed");
let mut v = res2.value.trim().to_string();
if v.len() >= 2 && v.starts_with('"') && v.ends_with('"') {
v = v[1..v.len() - 1].to_string();
}
assert!(v.is_empty());
let res3 = engine.evaluate_script("(()=>{ var out=[]; var id=setInterval(function(){ out.push('i'); if (out.length>=2) { clearInterval(id); } }, 0); __rfox_run_until_idle(); return out.join(','); })()").expect("Eval failed");
assert!(res3.value.contains("i,i") || res3.value.contains("i"));
let p1 = engine.evaluate_script("(()=>{ if (typeof _persist === 'undefined') _persist=0; _persist++; return _persist; })()").expect("Eval failed");
assert!(p1.value.contains("1"));
let p2 = engine
.evaluate_script("(()=>{ return _persist; })()")
.expect("Eval failed");
assert!(p2.value.contains("1"));
let fired = engine.evaluate_script("(()=>{ if (typeof window.__test_fired === 'undefined') window.__test_fired = 0; setTimeout(function(){ window.__test_fired++; }, 100); __rfox_tick(200); __rfox_run_until_idle(); return (typeof window.__test_fired === 'undefined') ? 0 : window.__test_fired; })()").expect("Eval failed");
println!("fired -> {}", fired.value);
assert!(fired.value.contains("1"));
let url2 = format!("http://{}", addr);
engine.load_url(&url2).expect("Failed to load URL");
let res_after_nav = engine
.evaluate_script(
"(()=>{ return (typeof _persist === 'undefined') ? 'undef' : _persist; })()",
)
.expect("Eval failed");
assert!(!res_after_nav.value.contains("1"));
let order = engine.evaluate_script("(()=>{ var out=[]; queueMicrotask(function(){ out.push('p'); }); setTimeout(function(){ out.push('t'); }, 0); __rfox_run_until_idle(); return out.join(','); })()").expect("Eval failed");
let ord = order.value.replace("\n", "").replace("\"", "");
println!("ord -> {}", ord);
assert!(ord.contains("p") && ord.contains("t"));
let snap = engine.snapshot_page_context().expect("Snapshot failed");
assert!(!snap.is_empty() && snap.contains("dom"));
let _set = engine
.evaluate_script("(()=>{ window._ab = 42; return _ab; })()")
.expect("set failed");
let r1 = engine
.evaluate_script("(()=>{ return (typeof _ab === 'undefined') ? 'undef' : _ab; })()")
.expect("read failed");
assert!(r1.value.contains("42"));
engine.abort_running_script().expect("abort failed");
let r2 = engine
.evaluate_script("(()=>{ return (typeof _ab === 'undefined') ? 'undef' : _ab; })()")
.expect("read after abort failed");
assert!(r2.value.contains("undef"));
if engine.config.use_process_worker {
let _ = engine
.evaluate_script("(()=>{ window._proc = 7; return _proc; })()")
.expect("set failed");
let eng_arc = std::sync::Arc::new(std::sync::Mutex::new(engine));
let eng_clone = eng_arc.clone();
let handle = std::thread::spawn(move || {
let mut e = eng_clone.lock().unwrap();
e.evaluate_script("(()=>{ while(true){} })() ")
});
std::thread::sleep(std::time::Duration::from_millis(50));
{
let mut e = eng_arc.lock().unwrap();
let _ = e.abort_running_script();
}
let _ = handle.join();
let mut e = eng_arc.lock().unwrap();
let r3 = e
.evaluate_script(
"(()=>{ return (typeof _proc === 'undefined') ? 'undef' : _proc; })()",
)
.expect("read after abort failed");
assert!(r3.value.contains("undef"));
}
}
}
#[test]
fn test_selector_combinators_and_attributes() {
if std::env::var("CI").is_ok() {
return;
}
let server = tiny_http::Server::http("0.0.0.0:0").unwrap();
let addr = server.server_addr();
std::thread::spawn(move || {
if let Ok(request) = server.recv() {
let response = tiny_http::Response::from_string(
"<html><head><title>S</title></head><body><div id=\"outer\"><div class=\"mid\"><span class=\"inner\" data-test=\"x\">X</span></div></div></body></html>",
);
let _ = request.respond(response);
}
});
let url = format!("http://{}", addr);
let mut engine =
RFEngine::new(crate::EngineConfig::default()).expect("Failed to create RFEngine");
engine.load_url(&url).expect("Failed to load URL");
if engine.config.enable_javascript {
let res = engine
.evaluate_script(
"(()=>{ return querySelector('div span').getAttribute('data-test'); })()",
)
.expect("Eval failed");
assert!(res.value.contains("x"));
let res2 = engine.evaluate_script("(()=>{ return querySelector('div#outer > span').getAttribute('data-test'); })()").expect("Eval failed");
assert!(res2.value.contains("null") || res2.value.contains("undefined"));
let dom_dump = engine
.evaluate_script("JSON.stringify(__rfox_dom)")
.expect("DOM dump failed");
assert!(dom_dump.value.contains("\"data-test\"") && dom_dump.value.contains("\"x\""));
let html = "<html><body><div id=\"p\"><span data-a=\"one two\">X</span><span data-a=\"two\">Y</span><span data-a=\"pre-suf\">Z</span></div></body></html>";
let server2 = tiny_http::Server::http("0.0.0.0:0").unwrap();
let addr2 = server2.server_addr();
let html_clone = html.to_string();
std::thread::spawn(move || {
if let Ok(request) = server2.recv() {
let response = tiny_http::Response::from_string(html_clone);
let _ = request.respond(response);
}
});
let url2 = format!("http://{}", addr2);
engine.load_url(&url2).expect("Failed to load URL");
let r1 = engine.evaluate_script("(()=>{ for (var i=0;i<__rfox_dom.length;i++){ var el=__rfox_dom[i]; for (var j=0;j<el.attributes.length;j++){ if (el.attributes[j][0]==='data-a'){ var v=el.attributes[j][1]; if (v.indexOf('two')!==-1) { return el.text; } } } } return null; })()").expect("Eval failed");
assert!(r1.value.contains("Y") || r1.value.contains("X"));
let r2 = engine.evaluate_script("(()=>{ for (var i=0;i<__rfox_dom.length;i++){ var el=__rfox_dom[i]; for (var j=0;j<el.attributes.length;j++){ if (el.attributes[j][0]==='data-a'){ var v=el.attributes[j][1]; if (v.indexOf('pre')===0) return el.text; } } } return null; })()").expect("Eval failed");
assert!(r2.value.contains("Z"));
let r3 = engine.evaluate_script("(()=>{ for (var i=0;i<__rfox_dom.length;i++){ var el=__rfox_dom[i]; for (var j=0;j<el.attributes.length;j++){ if (el.attributes[j][0]==='data-a'){ var v=el.attributes[j][1]; if (v.length >= 3 && v.slice(v.length-3) === 'two') return el.text; } } } return null; })()").expect("Eval failed");
assert!(r3.value.contains("Y") || r3.value.contains("X"));
let r4 = engine.evaluate_script("(()=>{ for (var i=0;i<__rfox_dom.length;i++){ var el=__rfox_dom[i]; for (var j=0;j<el.attributes.length;j++){ if (el.attributes[j][0]==='data-a'){ var v=el.attributes[j][1]; if (v === 'pre' || v.indexOf('pre-')===0) return el.text; } } } return null; })()").expect("Eval failed");
assert!(r4.value.contains("Z"));
let r5 = engine
.evaluate_script(
"(()=>{ return querySelector('#p span:first-child').textContent(); })()",
)
.expect("Eval failed");
assert!(r5.value.contains("X"));
let r6 = engine
.evaluate_script(
"(()=>{ return querySelector('#p span:last-child').textContent(); })()",
)
.expect("Eval failed");
assert!(r6.value.contains("Z"));
}
}
#[test]
fn test_process_worker_abort() {
if std::env::var("CI").is_ok() {
return;
}
let server = tiny_http::Server::http("0.0.0.0:0").unwrap();
let addr = server.server_addr();
std::thread::spawn(move || {
if let Ok(request) = server.recv() {
let response = tiny_http::Response::from_string(
"<html><head><title>P</title></head><body><div id=\"x\">X</div></body></html>",
);
let _ = request.respond(response);
}
});
let url = format!("http://{}", addr);
let cfg = crate::EngineConfig {
enable_javascript: true,
use_process_worker: true,
..Default::default()
};
let mut engine = RFEngine::new(cfg).expect("Failed to create RFEngine");
engine.load_url(&url).expect("Failed to load URL");
let set_res = engine
.evaluate_script("(()=>{ window._proc = 7; return _proc; })()")
.expect("set failed");
if !set_res.value.contains("7") {
eprintln!(
"Skipping process-backed worker abort test; worker failed to start: {}",
set_res.value
);
return;
}
let eng_arc = std::sync::Arc::new(std::sync::Mutex::new(engine));
let eng_clone = eng_arc.clone();
let handle = std::thread::spawn(move || {
let mut e = eng_clone.lock().unwrap();
let _ = e.evaluate_script("(()=>{ while(true){} })() ");
});
std::thread::sleep(std::time::Duration::from_millis(50));
{
let mut e = eng_arc.lock().unwrap();
let _ = e.abort_running_script();
}
let _ = handle.join();
let mut e = eng_arc.lock().unwrap();
let r3 = e
.evaluate_script("(()=>{ return (typeof _proc === 'undefined') ? 'undef' : _proc; })()")
.expect("read after abort failed");
assert!(r3.value.contains("undef"));
}
#[cfg(feature = "cdp")]
#[test]
#[ignore]
fn test_compare_with_chrome() {
if std::env::var("RUN_CHROMIUM_COMPARISONS").is_err() {
return;
}
if std::env::var("CI").is_ok() {
return;
}
use crate::cdp::CdpEngine;
let server = tiny_http::Server::http("0.0.0.0:0").unwrap();
let addr = server.server_addr();
std::thread::spawn(move || {
if let Ok(request) = server.recv() {
let response = tiny_http::Response::from_string(
"<html><head><title>RF</title><style>body{color:blue}.greeting{color:green}#hello{color:red;font-size:12px}</style></head><body><div id=\"hello\" class=\"greeting\">Hello RF</div></body></html>",
);
let _ = request.respond(response);
}
});
let url = format!("http://{}", addr);
let mut rf =
RFEngine::new(crate::EngineConfig::default()).expect("Failed to create RFEngine");
rf.load_url(&url).expect("Failed to load URL");
let mut c = match CdpEngine::new(crate::EngineConfig::default()) {
Ok(c) => c,
Err(e) => {
eprintln!("Skipping Chrome comparison; failed to start Chrome: {}", e);
return;
}
};
c.load_url(&url).expect("Chrome failed to load URL");
let rf_res = rf.evaluate_script("(()=>{ return getComputedStyle(document.querySelector('#hello')).getPropertyValue('color'); })()").expect("RF eval failed");
let c_res = c.evaluate_script_in_page("(()=>{ return getComputedStyle(document.querySelector('#hello')).getPropertyValue('color'); })()").expect("Chrome eval failed");
let rf_norm = rf_res
.value
.to_lowercase()
.replace('"', "")
.trim()
.to_string();
let c_norm = c_res
.value
.to_lowercase()
.replace('"', "")
.trim()
.to_string();
assert!(
rf_norm == c_norm,
"Computed styles diverged: rf='{}' chrome='{}'",
rf_norm,
c_norm
);
}
}