use vs_protocol::{Ref, StateToken, Tree};
use vs_store::{ActionFilter, AnnotationTarget};
use super::audit::AuditCtx;
use super::responses::{AnnotateResponse, ExtractResponse, LogResponse, MarkResponse};
use super::{short_id, Daemon};
use crate::error::{DaemonError, Result};
use crate::tokens;
impl Daemon {
pub fn extract(
&self,
session_id: &str,
page_id: &str,
schema: &str,
before_token: StateToken,
) -> Result<ExtractResponse> {
let ctx = AuditCtx::new("vs_extract", session_id)
.with_page(page_id)
.with_args(
schema.to_string(),
tokens::args_hash("vs_extract", &[schema.to_string()]),
)
.with_before(before_token);
self.audit_call(ctx, |ctx| {
let current = self.current_token(session_id, page_id)?;
if current != before_token {
return Err(DaemonError::StaleToken {
current,
reason: "mutate",
});
}
ctx.after_token = Some(current);
let sessions = self.inner.sessions.lock().expect("poisoned");
let page = sessions
.get(session_id)
.ok_or_else(|| DaemonError::UnknownSession(session_id.to_string()))?
.pages
.get(page_id)
.ok_or_else(|| DaemonError::UnknownPage(page_id.to_string()))?;
let tree = page.last_tree.as_ref().ok_or_else(|| {
DaemonError::BadRequest("no tree cached; call vs_view first".into())
})?;
let records = match schema {
"table" => extract_tables(tree),
"list" => extract_lists(tree),
"form" | "jsonld" | "webmcp" => {
let engine_handle = page.engine_handle;
drop(sessions);
extract_via_engine(&self.inner.engine, engine_handle, schema)?
}
other => {
return Err(DaemonError::BadRequest(format!("unknown schema: {other}")));
}
};
Ok(ExtractResponse {
token: current,
records,
})
})
}
pub fn mark(
&self,
session_id: &str,
page_id: &str,
r: Ref,
name: &str,
before_token: StateToken,
) -> Result<MarkResponse> {
let args = vec![r.to_string(), name.to_string()];
let ctx = AuditCtx::new("vs_mark", session_id)
.with_page(page_id)
.with_args(format!("{r} {name}"), tokens::args_hash("vs_mark", &args))
.with_before(before_token);
self.audit_call(ctx, |ctx| {
let current = self.current_token(session_id, page_id)?;
if current != before_token {
return Err(DaemonError::StaleToken {
current,
reason: "mutate",
});
}
ctx.after_token = Some(current);
let (dom_path, role, excerpt) = {
let sessions = self.inner.sessions.lock().expect("poisoned");
let page = sessions
.get(session_id)
.ok_or_else(|| DaemonError::UnknownSession(session_id.to_string()))?
.pages
.get(page_id)
.ok_or_else(|| DaemonError::UnknownPage(page_id.to_string()))?;
let node = page.find_node(r).ok_or(DaemonError::UnknownRef(r.0))?;
(
format!("{}#{}", node.role, r.0),
Some(node.role.to_string()),
Some(node.label.clone()),
)
};
let mark_id = format!("m_{}", short_id());
let mut store = self.inner.store.lock().expect("poisoned");
store.create_mark(
&mark_id,
session_id,
page_id,
name,
&dom_path,
role.as_deref(),
excerpt.as_deref(),
)?;
Ok(MarkResponse {
mark_id,
token: current,
})
})
}
pub fn annotate(
&self,
session_id: &str,
target: &AnnotationTarget,
key: &str,
value: Option<&str>,
) -> Result<AnnotateResponse> {
let target_str = match target {
AnnotationTarget::Ref(r) => format!("ref:{r}"),
AnnotationTarget::Mark(name) => format!("mark:{name}"),
AnnotationTarget::Page => "page".to_string(),
};
let args = vec![target_str.clone(), key.to_string()];
let ctx = AuditCtx::new("vs_annotate", session_id).with_args(
format!("{target_str} {key}"),
tokens::args_hash("vs_annotate", &args),
);
self.audit_call(ctx, |_ctx| {
self.require_session(session_id)?;
let id = format!("an_{}", short_id());
let mut store = self.inner.store.lock().expect("poisoned");
let row = store.add_annotation(&id, target, key, value)?;
Ok(AnnotateResponse { id: row.id })
})
}
pub fn log(
&self,
session_id: &str,
page_id: Option<String>,
group_label: Option<String>,
since_started_at: Option<i64>,
limit: Option<i64>,
) -> Result<LogResponse> {
let ctx = AuditCtx::new("vs_log", session_id)
.with_args(String::new(), tokens::args_hash("vs_log", &[]));
self.audit_call(ctx, |_ctx| {
self.require_session(session_id)?;
let filter = ActionFilter {
session_id: Some(session_id.to_string()),
page_id,
group_label,
since_started_at,
limit,
};
let store = self.inner.store.lock().expect("poisoned");
let rows = store.list_actions(&filter)?;
Ok(LogResponse { rows })
})
}
}
fn extract_tables(tree: &Tree) -> Vec<Vec<String>> {
fn collect_rows(node: &vs_protocol::Node, out: &mut Vec<Vec<String>>) {
if matches!(node.role, vs_protocol::Role::Row) {
let cells: Vec<String> = collect_cells(node);
if !cells.is_empty() {
out.push(cells);
}
return;
}
for c in &node.children {
collect_rows(c, out);
}
}
fn collect_cells(node: &vs_protocol::Node) -> Vec<String> {
let mut acc = Vec::new();
for c in &node.children {
if matches!(c.role, vs_protocol::Role::Cell | vs_protocol::Role::Hdr) {
acc.push(c.label.clone());
} else {
acc.extend(collect_cells(c));
}
}
acc
}
let mut out = Vec::new();
for node in tree {
if matches!(node.role, vs_protocol::Role::Tbl) {
for child in &node.children {
collect_rows(child, &mut out);
}
}
}
out
}
fn extract_lists(tree: &Tree) -> Vec<Vec<String>> {
fn collect_items(node: &vs_protocol::Node, out: &mut Vec<Vec<String>>) {
if matches!(node.role, vs_protocol::Role::Itm | vs_protocol::Role::Li) {
out.push(vec![node.role.to_string(), node.label.clone()]);
return;
}
for c in &node.children {
collect_items(c, out);
}
}
let mut out = Vec::new();
for node in tree {
if matches!(node.role, vs_protocol::Role::Lst) {
for child in &node.children {
collect_items(child, &mut out);
}
}
}
out
}
fn extract_via_engine(
engine: &vs_engine_webkit::EngineRuntime,
handle: vs_engine_webkit::PageHandle,
schema: &str,
) -> Result<Vec<Vec<String>>> {
use vs_engine_webkit::inspector::EvalResult;
let js = match schema {
"form" => {
r"(function() {
var out = [];
for (var i = 0; i < document.forms.length; i++) {
var f = document.forms[i];
for (var j = 0; j < f.elements.length; j++) {
var el = f.elements[j];
if (!el.name && !el.id) continue;
out.push([
f.id || ('form_' + i),
el.name || el.id,
el.type || el.tagName.toLowerCase(),
el.value || '',
]);
}
}
return JSON.stringify(out);
})()"
}
"jsonld" => {
r#"(function() {
var nodes = document.querySelectorAll('script[type="application/ld+json"]');
var out = [];
for (var i = 0; i < nodes.length; i++) {
out.push(['jsonld', nodes[i].textContent || '']);
}
return JSON.stringify(out);
})()"#
}
"webmcp" => {
r#"(function() {
var nodes = document.querySelectorAll('script[type="application/x-webmcp"]');
var out = [];
for (var i = 0; i < nodes.length; i++) {
out.push(['webmcp', nodes[i].textContent || '']);
}
return JSON.stringify(out);
})()"#
}
_ => return Err(DaemonError::BadRequest(format!("unknown schema: {schema}"))),
};
let result = engine
.eval_js(handle, js)
.map_err(|e| DaemonError::BadRequest(format!("engine: {e}")))?;
let value = match result {
EvalResult::Ok { value, .. } => value,
EvalResult::Thrown { kind, message } => {
return Err(DaemonError::BadRequest(format!(
"extract {schema}: {kind}: {message}"
)));
}
EvalResult::Syntax { message } => {
return Err(DaemonError::BadRequest(format!(
"extract {schema}: syntax: {message}"
)));
}
};
let arr: serde_json::Value = serde_json::from_str(&value)
.map_err(|e| DaemonError::BadRequest(format!("extract {schema}: parse: {e}")))?;
let rows = arr.as_array().cloned().unwrap_or_default();
let mut out = Vec::with_capacity(rows.len());
for row in rows {
let cells = row
.as_array()
.map(|a| {
a.iter()
.map(|v: &serde_json::Value| {
v.as_str().map_or_else(|| v.to_string(), str::to_string)
})
.collect::<Vec<_>>()
})
.unwrap_or_default();
out.push(cells);
}
Ok(out)
}