Struct RemoteMultimodalConfigs

Source

pub struct RemoteMultimodalConfigs {Show 17 fields
    pub api_url: String,
    pub api_key: Option<String>,
    pub model_name: String,
    pub system_prompt: Option<String>,
    pub system_prompt_extra: Option<String>,
    pub user_message_extra: Option<String>,
    pub cfg: RemoteMultimodalConfig,
    pub prompt_url_gate: Option<PromptUrlGate>,
    pub concurrency_limit: Option<usize>,
    pub vision_model: Option<ModelEndpoint>,
    pub text_model: Option<ModelEndpoint>,
    pub vision_route_mode: VisionRouteMode,
    pub use_chrome_ai: bool,
    pub chrome_ai_max_user_chars: usize,
    pub semaphore: OnceLock<Arc<Semaphore>>,
    pub relevance_credits: Arc<AtomicU32>,
    pub url_prefilter_cache: Arc<DashMap<String, bool>>,
}

Expand description

Top-level configuration bundle for remote multimodal automation.

This struct combines all the settings needed to drive the RemoteMultimodalEngine:

API connection (api_url, api_key, model_name)
Prompt configuration (system_prompt, system_prompt_extra, user_message_extra)
Runtime configuration (RemoteMultimodalConfig)
URL gating (PromptUrlGate)
Dual-model routing (vision_model, text_model, vision_route_mode)
Chrome AI (use_chrome_ai, chrome_ai_max_user_chars)
Skills (feature-gated skill_registry, s3_skill_source)
Concurrency (concurrency_limit, lazy semaphore)
Relevance tracking (relevance_credits, url_prefilter_cache)

§Example

use spider_agent::automation::RemoteMultimodalConfigs;

let mm = RemoteMultimodalConfigs::new(
    "https://openrouter.ai/api/v1/chat/completions",
    "qwen/qwen-2-vl-72b-instruct",
)
.with_api_key("sk-or-...")
.with_concurrency_limit(5);

Fields§

§api_url: String

OpenAI-compatible chat completions URL.

§api_key: Option<String>

Optional bearer key for Authorization: Bearer ...

§model_name: String

Model name/id for the target endpoint.

§system_prompt: Option<String>

Optional base system prompt (None => engine default).

§system_prompt_extra: Option<String>

Optional extra system instructions appended at runtime.

§user_message_extra: Option<String>

Optional extra user instructions appended at runtime.

§cfg: RemoteMultimodalConfig

Runtime knobs (capture policies, retry, looping, etc.)

§prompt_url_gate: Option<PromptUrlGate>

Optional URL gating and per-URL overrides.

§concurrency_limit: Option<usize>

Optional concurrency limit for remote inference calls.

§vision_model: Option<ModelEndpoint>

Optional vision model endpoint for dual-model routing. When set alongside text_model, the engine routes per-round based on VisionRouteMode.

§text_model: Option<ModelEndpoint>

Optional text-only model endpoint for dual-model routing.

§vision_route_mode: VisionRouteMode

Routing mode controlling when vision vs text model is used.

§use_chrome_ai: bool

Use Chrome’s built-in LanguageModel API (Gemini Nano) for inference.

When true, the automation loop evaluates JavaScript on the page via page.evaluate() calling LanguageModel.create() + session.prompt() instead of making HTTP API calls. This enables running the agent without any external API key.

When left false (default), Chrome AI is still used as a last-resort fallback if both api_url and api_key are empty.

Requires Chrome with built-in AI enabled:

chrome://flags/#optimization-guide-on-device-model → Enabled
chrome://flags/#prompt-api-for-gemini-nano → Enabled

§chrome_ai_max_user_chars: usize

Maximum user-prompt characters for Chrome AI inference.

Gemini Nano has limited context compared to cloud models. This budget controls the max length of the user message (HTML context, URL, title, task instructions). When the user prompt exceeds this limit, the HTML context section is truncated while preserving task instructions and memory.

Default: 6000 chars. Only used when Chrome AI is the active inference path.

§semaphore: OnceLock<Arc<Semaphore>>

Semaphore control for concurrency limiting.

§relevance_credits: Arc<AtomicU32>

Counter for pages deemed irrelevant — each unit = one budget credit to restore.

§url_prefilter_cache: Arc<DashMap<String, bool>>

Cache of URL path → relevant classification to avoid re-classifying.

Struct RemoteMultimodalConfigs Copy item path

§Example

Fields§

Implementations§

impl RemoteMultimodalConfigs

pub fn new(api_url: impl Into<String>, model_name: impl Into<String>) -> Self

§Example

pub fn get_or_init_semaphore(&self) -> Option<Arc<Semaphore>>

pub fn with_api_key(self, key: impl Into<String>) -> Self

pub fn with_system_prompt(self, prompt: impl Into<String>) -> Self

pub fn with_system_prompt_extra(self, extra: impl Into<String>) -> Self

pub fn with_user_message_extra(self, extra: impl Into<String>) -> Self

pub fn with_cfg(self, cfg: RemoteMultimodalConfig) -> Self

pub fn with_prompt_url_gate(self, gate: PromptUrlGate) -> Self

pub fn with_concurrency_limit(self, limit: usize) -> Self

pub fn with_extra_ai_data(self, enabled: bool) -> Self

pub fn with_extraction_prompt(self, prompt: impl Into<String>) -> Self

pub fn with_screenshot(self, enabled: bool) -> Self

pub fn with_extraction_schema(self, schema: ExtractionSchema) -> Self

pub fn model_supports_vision(&self) -> bool

pub fn should_include_screenshot(&self) -> bool

pub fn filter_screenshot<'a>( &self, screenshot: Option<&'a str>, ) -> Option<&'a str>

pub fn with_vision_model(self, endpoint: ModelEndpoint) -> Self

pub fn with_text_model(self, endpoint: ModelEndpoint) -> Self

pub fn with_vision_route_mode(self, mode: VisionRouteMode) -> Self

pub fn with_dual_models( self, vision: ModelEndpoint, text: ModelEndpoint, ) -> Self

pub fn with_relevance_gate(self, prompt: Option<String>) -> Self

pub fn with_url_prefilter(self, batch_size: Option<usize>) -> Self

pub fn with_chrome_ai(self, enabled: bool) -> Self

pub fn with_chrome_ai_max_user_chars(self, chars: usize) -> Self

pub fn should_use_chrome_ai(&self) -> bool

pub fn has_dual_model_routing(&self) -> bool

pub fn resolve_model_for_round( &self, use_vision: bool, ) -> (&str, &str, Option<&str>)

pub fn should_use_vision_this_round( &self, round_idx: usize, stagnated: bool, action_stuck_rounds: usize, force_vision: bool, ) -> bool

Trait Implementations§

impl Clone for RemoteMultimodalConfigs

fn clone(&self) -> RemoteMultimodalConfigs

fn clone_from(&mut self, source: &Self)

impl Debug for RemoteMultimodalConfigs

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for RemoteMultimodalConfigs

fn default() -> Self

impl<'de> Deserialize<'de> for RemoteMultimodalConfigswhere RemoteMultimodalConfigs: Default,

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where __D: Deserializer<'de>,

impl PartialEq for RemoteMultimodalConfigs

fn eq(&self, other: &Self) -> bool

fn ne(&self, other: &Rhs) -> bool

impl Serialize for RemoteMultimodalConfigs

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error>where __S: Serializer,

impl Eq for RemoteMultimodalConfigs

Auto Trait Implementations§

impl !Freeze for RemoteMultimodalConfigs

impl !RefUnwindSafe for RemoteMultimodalConfigs

impl Send for RemoteMultimodalConfigs

impl Sync for RemoteMultimodalConfigs

impl Unpin for RemoteMultimodalConfigs

impl !UnwindSafe for RemoteMultimodalConfigs

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<Q, K> Equivalent<K> for Qwhere Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

fn equivalent(&self, key: &K) -> bool

impl<Q, K> Equivalent<K> for Qwhere Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

fn equivalent(&self, key: &K) -> bool

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

Struct RemoteMultimodalConfigs

impl<'de> Deserialize<'de> for RemoteMultimodalConfigs
where RemoteMultimodalConfigs: Default,

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where __D: Deserializer<'de>,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<Q, K> Equivalent<K> for Q
where Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

impl<Q, K> Equivalent<K> for Q
where Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> DeserializeOwned for T
where T: for<'de> Deserialize<'de>,