package rsclaw:plugin;
interface host-browser {
browser-open: func(url: string) -> result<string, string>;
browser-snapshot: func() -> result<string, string>;
browser-click: func(ref-str: string) -> result<string, string>;
/// Native (CDP-level) mouse click at viewport coordinates. Use this
/// when synthetic JS click events are ignored by the page (typical for
/// React PointerEvent handlers on popups / paywalls).
browser-click-at: func(x: u32, y: u32) -> result<string, string>;
browser-fill: func(ref-str: string, text: string) -> result<string, string>;
browser-press: func(key: string) -> result<string, string>;
browser-eval: func(code: string) -> result<string, string>;
browser-wait-text: func(text: string, timeout-ms: u32) -> result<string, string>;
browser-screenshot: func() -> result<string, string>;
browser-download: func(ref-str: string, filename: string) -> result<string, string>;
browser-upload: func(ref-str: string, filepath: string) -> result<string, string>;
/// Upload MULTIPLE files at once to a single (multiple-capable) file input
/// identified by data-ref. Equivalent to selecting several files in the OS
/// dialog: sets input.files to all paths and fires change once. Use for
/// galleries / multi-image uploaders backed by one `<input multiple>`.
browser-upload-multi: func(ref-str: string, filepaths: list<string>) -> result<string, string>;
/// Upload one or more files to an input opened via a native file-chooser
/// dialog (e.g. a button that triggers a transient or cross-origin-iframe
/// `<input type=file>`). Intercepts the chooser at the CDP level, clicks the
/// trigger at (click-x, click-y), then sets all files at once. Works across
/// iframes/origins where `browser-upload`'s main-document selector cannot.
browser-upload-via-chooser: func(filepaths: list<string>, click-x: u32, click-y: u32) -> result<string, string>;
browser-get-url: func() -> result<string, string>;
/// Wait for an element matching `css-selector` to be present in the DOM.
/// Polls every 250ms via JS until match or timeout. Returns "ok" on
/// match, error on timeout. Use `browser-snapshot` afterwards if you
/// need a clickable ref.
wait-for-selector: func(css-selector: string, timeout-ms: u32) -> result<string, string>;
/// Wait until network requests have been quiet for ~500ms, or until
/// `timeout-ms` total. Returns "ok" or error on timeout.
wait-for-network-idle: func(timeout-ms: u32) -> result<string, string>;
/// Run a JavaScript function expression with structured arguments.
/// `code` MUST evaluate to a function (e.g. `"async (args) => { ... }"`
/// or `"function(args) { ... }"`). `args-json` is parsed and passed as
/// the function's first argument. The function's return value is
/// JSON-stringified and returned. Avoids the brittle string-interpolation
/// and manual escaping that `browser-eval` requires.
eval-with-args: func(code: string, args-json: string) -> result<string, string>;
/// Switch the active browser tab to the most recently opened one.
/// Useful when an action opens a popup window. Replaces the
/// `browser-eval("__switch_latest_tab")` magic-string convention.
switch-latest-tab: func() -> result<string, string>;
}
interface host-runtime {
log: func(level: string, msg: string);
sleep: func(ms: u32);
read-file: func(path: string) -> result<string, string>;
/// Extract readable text from a saved plugin artifact. The path is
/// sandboxed to plugin-controlled artifact directories.
extract-file-text: func(path: string) -> result<string, string>;
/// Write `contents` to `path`. The path is sandboxed to the plugin
/// workspace or var/plugins/<name>/ directory; writes outside these
/// dirs are rejected. Parent directories are created automatically.
write-file: func(path: string, contents: string) -> result<string, string>;
/// Ensure a directory and all its parents exist (mkdir -p).
/// The path is sandboxed the same way as write-file.
ensure-dir: func(path: string) -> result<string, string>;
/// Execute a SQL statement that does not return rows (CREATE, INSERT,
/// UPDATE, DELETE, etc.). `params` are bound positionally (?1, ?2, ...)
/// to prevent SQL injection. Returns JSON {"rows_affected": N,
/// "last_insert_rowid": M}. Each plugin gets its own isolated SQLite DB.
sql-execute: func(sql: string, params: list<string>) -> result<string, string>;
/// Execute a SQL query that returns rows (SELECT). `params` are bound
/// positionally. Returns a JSON array of objects, one per row.
sql-query: func(sql: string, params: list<string>) -> result<string, string>;
/// Send a progress/notification message to the user during long operations.
notify: func(message: string) -> result<string, string>;
/// Send a message + an inline image. The image-data-uri must be a
/// `data:image/<format>;base64,<...>` string (the browser-screenshot
/// host fn already returns this shape). The host always populates
/// the OutboundMessage's `images` field with the data URI; channel
/// handlers (feishu, wechat, desktop, etc.) decide how to render —
/// IM channels upload it inline, desktop renders/saves it natively.
notify-with-image: func(message: string, image-data-uri: string) -> result<string, string>;
/// Send a message + a file attachment by absolute path. `mime` like
/// "video/mp4" or "image/png". The file path must resolve under the
/// plugin workspace (canonicalized & allowlisted by the host). The
/// host populates OutboundMessage.files; channel handlers decide
/// how to deliver (IM channels upload, desktop surfaces the path).
notify-with-file: func(message: string, file-path: string, mime: string) -> result<string, string>;
/// Ingest a prepared document into the local knowledge base. The
/// collection is resolved by name and created when missing.
kb-ingest-document: func(collection: string, title: string, content: string, mime: string)
-> result<string, string>;
}
interface host-config {
/// Return this plugin's resolved config JSON. Simple `{source:"env",id:"VAR"}`
/// secret references are replaced with the environment value before the
/// guest sees the object.
plugin-config: func() -> result<string, string>;
}
interface host-context {
/// Return the current invocation context as JSON. Empty fields mean the
/// plugin was invoked outside an agent/channel turn.
current-context: func() -> result<string, string>;
}
interface host-http {
/// Perform an HTTP request. `headers-json` is an object, `body` is raw
/// UTF-8 text, and the return value is JSON `{status, headers, body}`.
request: func(method: string, url: string, headers-json: string, body: string, timeout-ms: u32)
-> result<string, string>;
}
interface host-kv {
/// Plugin-scoped key/value store backed by the host.
kv-get: func(key: string) -> result<string, string>;
kv-set: func(key: string, value: string) -> result<string, string>;
kv-delete: func(key: string) -> result<string, string>;
}
interface host-device {
/// Stable host-generated public key for this machine/profile.
device-public-key: func() -> result<string, string>;
/// Sign an arbitrary payload with the host-held private key.
device-sign: func(payload: string) -> result<string, string>;
}
interface host-background {
/// Register/update a cron-style task for this plugin.
cron-register: func(name: string, schedule-json: string) -> result<string, string>;
/// Subscribe to an SSE stream.
sse-subscribe: func(name: string, url: string, headers-json: string, resume-key: string)
-> result<string, string>;
/// Inspect a plugin SSE subscription scoped to the current invocation context.
sse-status: func(name: string) -> result<string, string>;
/// Push an outbound message to a concrete channel/peer.
push-outbound: func(channel: string, peer-id: string, message-json: string) -> result<string, string>;
/// Submit a synthetic user turn to the agent queue.
submit-agent-turn: func(session-key: string, prompt: string, route-json: string)
-> result<string, string>;
}
interface host-storage {
/// Request a writable absolute path for a new artifact file. The host
/// picks the location (typically under its base-dir), creates the parent
/// directory, and returns a normalized absolute path. Plugins MUST use
/// this instead of constructing filesystem paths themselves — the host
/// owns the on-disk layout.
///
/// `filename` is a HINT — the host uses its extension to pick a category
/// (i/v/a/d/f) but ignores the stem and writes a canonical
/// `dl_<kind>_<YYYYMMDDHHmm><ab>.<ext>` instead. Pass any short
/// representative name like `"video.mp4"` / `"image.png"`.
allocate-artifact: func(filename: string) -> result<string, string>;
/// Allocate a batch of related artifact paths sharing the same base
/// (timestamp + 2-letter random suffix), differing only in the
/// `_N` index suffix. Use this when a single tool call produces
/// several outputs of the same kind (e.g. a 4-image batch).
/// Returns paths in 1-based order.
allocate-artifact-group: func(filename: string, count: u32) -> result<list<string>, string>;
}
interface host-media {
/// Extract audio track from a local video or audio file using ffmpeg.
/// Converts to 16kHz mono WAV (compatible with most STT engines).
/// Input can be video (mp4/webm/mov/...) or audio (mp3/wav/...).
extract-audio: func(input-path: string) -> result<string, string>;
/// Transcribe audio to text using the host's configured STT engine.
/// Provider priority: sherpa-onnx (local) → whisper.cpp → OpenAI API.
/// `language` is a BCP-47 code (zh-CN, en-US, ja-JP, etc).
transcribe: func(audio-path: string, language: string) -> result<string, string>;
/// Extract keyframes from a local video file using ffmpeg.
/// Spreads `count` frames evenly across the video duration.
/// Returns a list of PNG image file paths.
extract-keyframes: func(video-path: string, count: u32) -> result<list<string>, string>;
}
interface host-android {
/// Tap screen at absolute coordinate.
android-tap: func(x: u32, y: u32) -> result<string, string>;
/// Swipe from (x1,y1) to (x2,y2) over duration-ms milliseconds.
android-swipe: func(x1: u32, y1: u32, x2: u32, y2: u32, duration-ms: u32) -> result<string, string>;
/// Type text into the currently focused element (equivalent to send_keys).
android-type: func(text: string) -> result<string, string>;
/// Set Android primary clipboard text.
android-clipboard-set: func(text: string) -> result<string, string>;
/// Paste Android primary clipboard text into the currently focused input.
android-paste: func() -> result<string, string>;
/// Press a hardware/nav key by name: back, home, enter.
android-press: func(key: string) -> result<string, string>;
/// Get full UI hierarchy as XML. Set compressed=false for WebView or Flutter content.
android-get-ui-xml: func(compressed: bool) -> result<string, string>;
/// Get current foreground "package/ActivityName".
android-current-activity: func() -> result<string, string>;
/// Launch app by package name and wait for it to become foreground.
android-launch-app: func(pkg: string) -> result<string, string>;
/// Force-stop app by package name.
android-stop-app: func(pkg: string) -> result<string, string>;
/// Capture screenshot as data:image/png;base64,... URI.
android-screenshot: func() -> result<string, string>;
/// Find elements by selector. Returns JSON array of {text, resource-id, content-desc, bounds, clickable}.
/// selector-type: "resource-id" | "text" | "text-contains" | "content-desc" | "content-desc-contains" | "class"
android-find-elements: func(selector-type: string, selector-value: string) -> result<string, string>;
/// Tap the first element matching selector. Returns "tapped" or error.
android-tap-element: func(selector-type: string, selector-value: string) -> result<string, string>;
/// Get text content of the first element matching selector.
android-get-element-text: func(selector-type: string, selector-value: string) -> result<string, string>;
/// Set text of the first element matching selector, clearing existing content.
android-set-element-text: func(selector-type: string, selector-value: string, text: string) -> result<string, string>;
/// Return whether any element matching selector is currently on screen.
android-element-exists: func(selector-type: string, selector-value: string) -> result<bool, string>;
/// Wait up to timeout-ms for an element matching selector to appear. Returns "found" or error.
android-wait-for-element: func(selector-type: string, selector-value: string, timeout-ms: u32) -> result<string, string>;
/// Tap the brand-yellow primary button by its RENDERED COLOR. Flutter confirm
/// dialogs (下架/删除/取消订单/发货) expose no UI tree, so element selectors
/// can't find their 确定/确认 button — but it's always the bright-yellow action.
/// Screenshots, finds the largest yellow region within the vertical band
/// [y-min, y-max) (pass 0,0 for the whole screen), and taps its center.
/// Returns "tapped:x,y" or an error if no yellow button is found.
android-tap-yellow-button: func(y-min: u32, y-max: u32) -> result<string, string>;
}
interface host-ios {
/// Connect to WebDriverAgent and optionally launch app by bundle id.
/// Returns session id string on success.
ios-connect: func(bundle-id: option<string>) -> result<string, string>;
/// Find elements by WDA selector. Returns JSON array of
/// [{text, label, type, enabled, rect:{x,y,w,h}}].
ios-find-elements: func(selector-type: string, selector-value: string) -> result<string, string>;
/// Tap the first element matching selector.
ios-tap-element: func(selector-type: string, selector-value: string) -> result<string, string>;
/// Tap at absolute screen coordinates (points, not pixels).
ios-tap: func(x: f64, y: f64) -> result<string, string>;
/// Type text via the keyboard (UIKit path).
ios-type: func(text: string) -> result<string, string>;
/// Swipe from (x1,y1) to (x2,y2) over duration-ms.
ios-swipe: func(x1: f64, y1: f64, x2: f64, y2: f64, duration-ms: u32) -> result<string, string>;
/// Get compact screen element labels as JSON array of strings.
ios-get-labels: func() -> result<string, string>;
/// Capture screenshot as data:image/png;base64,... URI.
ios-screenshot: func() -> result<string, string>;
/// Get screen size in points as JSON {width, height}.
ios-screen-size: func() -> result<string, string>;
/// Press a system keyboard button by name: "Send", "Return", "back", "home".
ios-press-button: func(name: string) -> result<string, string>;
/// Get current foreground app bundle id.
ios-current-app: func() -> result<string, string>;
/// Launch/foreground app by bundle id.
ios-launch-app: func(bundle-id: string) -> result<string, string>;
/// Terminate app by bundle id.
ios-terminate-app: func(bundle-id: string) -> result<string, string>;
}
interface host-desktop {
/// Activate an application by bundle-id (macOS), exe name (Windows), or WM_CLASS (Linux).
desktop-activate-app: func(bundle-id: string) -> result<string, string>;
/// List all windows of the target app. Returns JSON array:
/// [{"idx":1,"title":"...","x":0,"y":0,"w":900,"h":600}]
desktop-list-windows: func(bundle-id: string) -> result<string, string>;
/// Close a specific window by its index (from list-windows).
desktop-close-window: func(bundle-id: string, window-idx: u32) -> result<string, string>;
/// Get the main window bounds (x, y, w, h) as JSON.
/// Prefers title=="Weixin", falls back to largest window.
desktop-get-main-window: func(bundle-id: string) -> result<string, string>;
/// Screenshot the app's main window. Returns data:image/png;base64,... data URI.
desktop-screenshot-window: func(bundle-id: string) -> result<string, string>;
/// Screenshot a screen region. Returns data URI.
desktop-screenshot-region: func(x: u32, y: u32, w: u32, h: u32) -> result<string, string>;
/// Scan an absolute screen region for pixels near a target RGB colour
/// (per-channel `tolerance`). Cross-platform pure-pixel test (no OCR/VLM),
/// used for red-badge *presence* detection (we can't read the digit, but we
/// can see the red cluster). Returns JSON
/// `{"hit":bool,"count":u32,"total":u32,"ratio":float}` where
/// `hit = count >= min-count`.
desktop-region-has-color: func(x: u32, y: u32, w: u32, h: u32, r: u32, g: u32, b: u32, tolerance: u32, min-count: u32) -> result<string, string>;
/// OCR the app's main window (macOS Vision). Returns JSON
/// `[{"text":"...","x":143,"y":699}, ...]` with 0-1000 relative centre
/// coords -- precise enough to click a named row.
desktop-ocr-window: func(bundle-id: string) -> result<string, string>;
/// Move the mouse cursor to absolute screen coordinates (x, y) without clicking.
desktop-mouse-move: func(x: u32, y: u32) -> result<string, string>;
/// Mouse left-click at absolute screen coordinates (x, y).
desktop-mouse-click: func(x: u32, y: u32) -> result<string, string>;
/// Mouse double-click at absolute screen coordinates (x, y).
desktop-mouse-double-click: func(x: u32, y: u32) -> result<string, string>;
/// Drag from (x1, y1) to (x2, y2).
desktop-mouse-drag: func(x1: u32, y1: u32, x2: u32, y2: u32) -> result<string, string>;
/// Scroll the mouse wheel. Positive clicks = down, negative = up.
desktop-mouse-scroll: func(clicks: s32) -> result<string, string>;
/// Press a key with optional modifiers.
/// key: key name (e.g. "Return", "Escape", "v", "k").
/// modifiers: list of "command", "control", "shift", "option" / "alt".
desktop-key-press: func(key: string, modifiers: list<string>) -> result<string, string>;
/// Set the system clipboard text.
desktop-clipboard-set: func(text: string) -> result<string, string>;
/// Get the system clipboard text.
desktop-clipboard-get: func() -> result<string, string>;
/// Set the system clipboard to a file reference (for paste-as-file).
desktop-clipboard-set-file: func(file-path: string) -> result<string, string>;
/// Get image data from the system clipboard (PNG, returns data URI).
/// Returns empty/error if no image is on the clipboard.
desktop-clipboard-get-image: func() -> result<string, string>;
/// Mouse right-click at absolute screen coordinates (x, y).
desktop-mouse-right-click: func(x: u32, y: u32) -> result<string, string>;
/// Open a native file dialog. Returns the selected file path or error.
desktop-file-dialog-open: func(title: string, filters: list<string>) -> result<string, string>;
}
interface host-vlm {
/// Parse an image with a vision-language model.
/// image-data-uri must be data:image/png;base64,... format.
/// prompt is the task description sent to the VLM.
/// Returns the VLM's raw text output.
vlm-parse: func(image-data-uri: string, prompt: string, max-tokens: u32)
-> result<string, string>;
}
interface plugin-api {
handle-tool: func(tool-name: string, args-json: string) -> result<string, string>;
}
world jimeng-plugin {
import host-browser;
import host-runtime;
import host-config;
import host-context;
import host-http;
import host-kv;
import host-device;
import host-background;
import host-storage;
import host-media;
import host-android;
import host-ios;
import host-desktop;
import host-vlm;
export plugin-api;
}