1use vs_protocol::{Ref, StateToken, Tree};
6use vs_store::{ActionFilter, AnnotationTarget};
7
8use super::audit::AuditCtx;
9use super::responses::{AnnotateResponse, ExtractResponse, LogResponse, MarkResponse};
10use super::{short_id, Daemon};
11use crate::error::{DaemonError, Result};
12use crate::tokens;
13
14impl Daemon {
15 pub fn extract(
17 &self,
18 session_id: &str,
19 page_id: &str,
20 schema: &str,
21 before_token: StateToken,
22 ) -> Result<ExtractResponse> {
23 let ctx = AuditCtx::new("vs_extract", session_id)
24 .with_page(page_id)
25 .with_args(
26 schema.to_string(),
27 tokens::args_hash("vs_extract", &[schema.to_string()]),
28 )
29 .with_before(before_token);
30 self.audit_call(ctx, |ctx| {
31 let current = self.current_token(session_id, page_id)?;
32 if current != before_token {
33 return Err(DaemonError::StaleToken {
34 current,
35 reason: "mutate",
36 });
37 }
38 ctx.after_token = Some(current);
39
40 let sessions = self.inner.sessions.lock().expect("poisoned");
41 let page = sessions
42 .get(session_id)
43 .ok_or_else(|| DaemonError::UnknownSession(session_id.to_string()))?
44 .pages
45 .get(page_id)
46 .ok_or_else(|| DaemonError::UnknownPage(page_id.to_string()))?;
47 let tree = page.last_tree.as_ref().ok_or_else(|| {
48 DaemonError::BadRequest("no tree cached; call vs_view first".into())
49 })?;
50 let records = match schema {
51 "table" => extract_tables(tree),
52 "list" => extract_lists(tree),
53 "form" | "jsonld" | "webmcp" => {
54 let engine_handle = page.engine_handle;
60 drop(sessions);
61 extract_via_engine(&self.inner.engine, engine_handle, schema)?
62 }
63 other => {
64 return Err(DaemonError::BadRequest(format!("unknown schema: {other}")));
65 }
66 };
67 Ok(ExtractResponse {
68 token: current,
69 records,
70 })
71 })
72 }
73
74 pub fn mark(
76 &self,
77 session_id: &str,
78 page_id: &str,
79 r: Ref,
80 name: &str,
81 before_token: StateToken,
82 ) -> Result<MarkResponse> {
83 let args = vec![r.to_string(), name.to_string()];
84 let ctx = AuditCtx::new("vs_mark", session_id)
85 .with_page(page_id)
86 .with_args(format!("{r} {name}"), tokens::args_hash("vs_mark", &args))
87 .with_before(before_token);
88 self.audit_call(ctx, |ctx| {
89 let current = self.current_token(session_id, page_id)?;
90 if current != before_token {
91 return Err(DaemonError::StaleToken {
92 current,
93 reason: "mutate",
94 });
95 }
96 ctx.after_token = Some(current);
97
98 let (dom_path, role, excerpt) = {
99 let sessions = self.inner.sessions.lock().expect("poisoned");
100 let page = sessions
101 .get(session_id)
102 .ok_or_else(|| DaemonError::UnknownSession(session_id.to_string()))?
103 .pages
104 .get(page_id)
105 .ok_or_else(|| DaemonError::UnknownPage(page_id.to_string()))?;
106 let node = page.find_node(r).ok_or(DaemonError::UnknownRef(r.0))?;
107 (
108 format!("{}#{}", node.role, r.0),
109 Some(node.role.to_string()),
110 Some(node.label.clone()),
111 )
112 };
113
114 let mark_id = format!("m_{}", short_id());
115 let mut store = self.inner.store.lock().expect("poisoned");
116 store.create_mark(
117 &mark_id,
118 session_id,
119 page_id,
120 name,
121 &dom_path,
122 role.as_deref(),
123 excerpt.as_deref(),
124 )?;
125 Ok(MarkResponse {
126 mark_id,
127 token: current,
128 })
129 })
130 }
131
132 pub fn annotate(
134 &self,
135 session_id: &str,
136 target: &AnnotationTarget,
137 key: &str,
138 value: Option<&str>,
139 ) -> Result<AnnotateResponse> {
140 let target_str = match target {
141 AnnotationTarget::Ref(r) => format!("ref:{r}"),
142 AnnotationTarget::Mark(name) => format!("mark:{name}"),
143 AnnotationTarget::Page => "page".to_string(),
144 };
145 let args = vec![target_str.clone(), key.to_string()];
146 let ctx = AuditCtx::new("vs_annotate", session_id).with_args(
147 format!("{target_str} {key}"),
148 tokens::args_hash("vs_annotate", &args),
149 );
150 self.audit_call(ctx, |_ctx| {
151 self.require_session(session_id)?;
152 let id = format!("an_{}", short_id());
153 let mut store = self.inner.store.lock().expect("poisoned");
154 let row = store.add_annotation(&id, target, key, value)?;
155 Ok(AnnotateResponse { id: row.id })
156 })
157 }
158
159 pub fn log(
161 &self,
162 session_id: &str,
163 page_id: Option<String>,
164 group_label: Option<String>,
165 since_started_at: Option<i64>,
166 limit: Option<i64>,
167 ) -> Result<LogResponse> {
168 let ctx = AuditCtx::new("vs_log", session_id)
169 .with_args(String::new(), tokens::args_hash("vs_log", &[]));
170 self.audit_call(ctx, |_ctx| {
171 self.require_session(session_id)?;
172 let filter = ActionFilter {
173 session_id: Some(session_id.to_string()),
174 page_id,
175 group_label,
176 since_started_at,
177 limit,
178 };
179 let store = self.inner.store.lock().expect("poisoned");
180 let rows = store.list_actions(&filter)?;
181 Ok(LogResponse { rows })
182 })
183 }
184}
185
186fn extract_tables(tree: &Tree) -> Vec<Vec<String>> {
191 fn collect_rows(node: &vs_protocol::Node, out: &mut Vec<Vec<String>>) {
192 if matches!(node.role, vs_protocol::Role::Row) {
193 let cells: Vec<String> = collect_cells(node);
194 if !cells.is_empty() {
195 out.push(cells);
196 }
197 return;
198 }
199 for c in &node.children {
200 collect_rows(c, out);
201 }
202 }
203 fn collect_cells(node: &vs_protocol::Node) -> Vec<String> {
204 let mut acc = Vec::new();
205 for c in &node.children {
206 if matches!(c.role, vs_protocol::Role::Cell | vs_protocol::Role::Hdr) {
207 acc.push(c.label.clone());
208 } else {
209 acc.extend(collect_cells(c));
211 }
212 }
213 acc
214 }
215 let mut out = Vec::new();
216 for node in tree {
217 if matches!(node.role, vs_protocol::Role::Tbl) {
218 for child in &node.children {
219 collect_rows(child, &mut out);
220 }
221 }
222 }
223 out
224}
225
226fn extract_lists(tree: &Tree) -> Vec<Vec<String>> {
228 fn collect_items(node: &vs_protocol::Node, out: &mut Vec<Vec<String>>) {
229 if matches!(node.role, vs_protocol::Role::Itm | vs_protocol::Role::Li) {
230 out.push(vec![node.role.to_string(), node.label.clone()]);
231 return;
232 }
233 for c in &node.children {
234 collect_items(c, out);
235 }
236 }
237 let mut out = Vec::new();
238 for node in tree {
239 if matches!(node.role, vs_protocol::Role::Lst) {
240 for child in &node.children {
241 collect_items(child, &mut out);
242 }
243 }
244 }
245 out
246}
247
248fn extract_via_engine(
253 engine: &vs_engine_webkit::EngineRuntime,
254 handle: vs_engine_webkit::PageHandle,
255 schema: &str,
256) -> Result<Vec<Vec<String>>> {
257 use vs_engine_webkit::inspector::EvalResult;
258 let js = match schema {
259 "form" => {
260 r"(function() {
261 var out = [];
262 for (var i = 0; i < document.forms.length; i++) {
263 var f = document.forms[i];
264 for (var j = 0; j < f.elements.length; j++) {
265 var el = f.elements[j];
266 if (!el.name && !el.id) continue;
267 out.push([
268 f.id || ('form_' + i),
269 el.name || el.id,
270 el.type || el.tagName.toLowerCase(),
271 el.value || '',
272 ]);
273 }
274 }
275 return JSON.stringify(out);
276 })()"
277 }
278 "jsonld" => {
279 r#"(function() {
280 var nodes = document.querySelectorAll('script[type="application/ld+json"]');
281 var out = [];
282 for (var i = 0; i < nodes.length; i++) {
283 out.push(['jsonld', nodes[i].textContent || '']);
284 }
285 return JSON.stringify(out);
286 })()"#
287 }
288 "webmcp" => {
289 r#"(function() {
290 var nodes = document.querySelectorAll('script[type="application/x-webmcp"]');
291 var out = [];
292 for (var i = 0; i < nodes.length; i++) {
293 out.push(['webmcp', nodes[i].textContent || '']);
294 }
295 return JSON.stringify(out);
296 })()"#
297 }
298 _ => return Err(DaemonError::BadRequest(format!("unknown schema: {schema}"))),
299 };
300 let result = engine
301 .eval_js(handle, js)
302 .map_err(|e| DaemonError::BadRequest(format!("engine: {e}")))?;
303 let value = match result {
304 EvalResult::Ok { value, .. } => value,
305 EvalResult::Thrown { kind, message } => {
306 return Err(DaemonError::BadRequest(format!(
307 "extract {schema}: {kind}: {message}"
308 )));
309 }
310 EvalResult::Syntax { message } => {
311 return Err(DaemonError::BadRequest(format!(
312 "extract {schema}: syntax: {message}"
313 )));
314 }
315 };
316 let arr: serde_json::Value = serde_json::from_str(&value)
319 .map_err(|e| DaemonError::BadRequest(format!("extract {schema}: parse: {e}")))?;
320 let rows = arr.as_array().cloned().unwrap_or_default();
321 let mut out = Vec::with_capacity(rows.len());
322 for row in rows {
323 let cells = row
324 .as_array()
325 .map(|a| {
326 a.iter()
327 .map(|v: &serde_json::Value| {
328 v.as_str().map_or_else(|| v.to_string(), str::to_string)
329 })
330 .collect::<Vec<_>>()
331 })
332 .unwrap_or_default();
333 out.push(cells);
334 }
335 Ok(out)
336}