use std::time::Duration;
use crate::ContentAnalysis;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
pub enum RecoveryStrategy {
#[default]
Retry,
Alternative,
Skip,
Abort,
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct RetryPolicy {
pub max_attempts: usize,
pub backoff_ms: u64,
pub retry_on_parse_error: bool,
pub retry_on_step_failure: bool,
}
impl Default for RetryPolicy {
fn default() -> Self {
Self {
max_attempts: 3,
backoff_ms: 1000,
retry_on_parse_error: true,
retry_on_step_failure: true,
}
}
}
impl RetryPolicy {
pub fn new(max_attempts: usize) -> Self {
Self {
max_attempts,
..Default::default()
}
}
pub fn none() -> Self {
Self {
max_attempts: 1,
backoff_ms: 0,
retry_on_parse_error: false,
retry_on_step_failure: false,
}
}
pub fn with_backoff(mut self, ms: u64) -> Self {
self.backoff_ms = ms;
self
}
pub fn backoff_duration(&self) -> Duration {
Duration::from_millis(self.backoff_ms)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
pub enum CostTier {
Low,
#[default]
Medium,
High,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct ModelPolicy {
pub small: String,
pub medium: String,
pub large: String,
pub allow_large: bool,
pub max_latency_ms: Option<u64>,
pub max_cost_tier: CostTier,
}
impl Default for ModelPolicy {
fn default() -> Self {
Self {
small: "gpt-4o-mini".to_string(),
medium: "gpt-4o".to_string(),
large: "gpt-4o".to_string(),
allow_large: true,
max_latency_ms: None,
max_cost_tier: CostTier::High,
}
}
}
impl ModelPolicy {
pub fn model_for_tier(&self, tier: CostTier) -> &str {
match tier {
CostTier::Low => &self.small,
CostTier::Medium => &self.medium,
CostTier::High if self.allow_large => &self.large,
CostTier::High => &self.medium,
}
}
pub fn with_small(mut self, model: impl Into<String>) -> Self {
self.small = model.into();
self
}
pub fn with_medium(mut self, model: impl Into<String>) -> Self {
self.medium = model.into();
self
}
pub fn with_large(mut self, model: impl Into<String>) -> Self {
self.large = model.into();
self
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
pub struct ModelEndpoint {
pub model_name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub api_url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub api_key: Option<String>,
}
impl ModelEndpoint {
pub fn new(model_name: impl Into<String>) -> Self {
Self {
model_name: model_name.into(),
api_url: None,
api_key: None,
}
}
pub fn with_api_url(mut self, url: impl Into<String>) -> Self {
self.api_url = Some(url.into());
self
}
pub fn with_api_key(mut self, key: impl Into<String>) -> Self {
self.api_key = Some(key.into());
self
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
pub enum VisionRouteMode {
#[default]
AlwaysPrimary,
TextFirst,
VisionFirst,
AgentDriven,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ReasoningEffort {
Low,
#[default]
Medium,
High,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
pub enum HtmlCleaningProfile {
#[default]
Default,
Aggressive,
Slim,
Minimal,
Raw,
Auto,
}
impl HtmlCleaningProfile {
const SVG_HEAVY_THRESHOLD: usize = 50_000; const SVG_VERY_HEAVY_THRESHOLD: usize = 100_000; const BASE64_HEAVY_THRESHOLD: usize = 100_000; const SCRIPT_HEAVY_THRESHOLD: usize = 200_000; const CLEANABLE_RATIO_HIGH: f32 = 0.4; const CLEANABLE_RATIO_MEDIUM: f32 = 0.25;
pub fn from_content_analysis(analysis: &ContentAnalysis) -> Self {
Self::from_content_analysis_with_intent(analysis, CleaningIntent::General)
}
pub fn from_content_analysis_with_intent(
analysis: &ContentAnalysis,
intent: CleaningIntent,
) -> Self {
if analysis.svg_bytes > Self::SVG_VERY_HEAVY_THRESHOLD {
return HtmlCleaningProfile::Slim;
}
if analysis.base64_bytes > Self::BASE64_HEAVY_THRESHOLD {
return HtmlCleaningProfile::Slim;
}
if analysis.cleanable_ratio > Self::CLEANABLE_RATIO_HIGH {
return HtmlCleaningProfile::Slim;
}
match intent {
CleaningIntent::Extraction => {
if analysis.svg_bytes > Self::SVG_HEAVY_THRESHOLD {
return HtmlCleaningProfile::Slim;
}
if analysis.script_bytes > Self::SCRIPT_HEAVY_THRESHOLD {
return HtmlCleaningProfile::Aggressive;
}
if analysis.html_length > 100_000 {
return HtmlCleaningProfile::Aggressive;
}
if analysis.cleanable_ratio > Self::CLEANABLE_RATIO_MEDIUM {
return HtmlCleaningProfile::Slim;
}
if analysis.canvas_count > 0 || analysis.video_count > 1 || analysis.embed_count > 0
{
return HtmlCleaningProfile::Slim;
}
if analysis.text_ratio < 0.1 && analysis.html_length > 30_000 {
return HtmlCleaningProfile::Aggressive;
}
HtmlCleaningProfile::Slim
}
CleaningIntent::Action => {
if analysis.svg_bytes > Self::SVG_HEAVY_THRESHOLD {
return HtmlCleaningProfile::Slim;
}
if analysis.cleanable_ratio > Self::CLEANABLE_RATIO_MEDIUM {
return HtmlCleaningProfile::Default;
}
if analysis.html_length > 150_000 {
return HtmlCleaningProfile::Default;
}
HtmlCleaningProfile::Minimal
}
CleaningIntent::General => {
if analysis.svg_bytes > Self::SVG_HEAVY_THRESHOLD {
return HtmlCleaningProfile::Slim;
}
if analysis.cleanable_ratio > Self::CLEANABLE_RATIO_MEDIUM {
return HtmlCleaningProfile::Slim;
}
if analysis.canvas_count > 0 || analysis.video_count > 2 {
return HtmlCleaningProfile::Slim;
}
if analysis.text_ratio < 0.05 && analysis.html_length > 50_000 {
return HtmlCleaningProfile::Aggressive;
}
if analysis.embed_count > 0 {
return HtmlCleaningProfile::Slim;
}
if analysis.html_length > 100_000 && analysis.text_ratio < 0.15 {
return HtmlCleaningProfile::Default;
}
if analysis.html_length > 30_000 {
return HtmlCleaningProfile::Default;
}
HtmlCleaningProfile::Minimal
}
}
}
pub fn removes_svgs(&self) -> bool {
matches!(
self,
HtmlCleaningProfile::Slim | HtmlCleaningProfile::Aggressive
)
}
pub fn removes_media(&self) -> bool {
matches!(self, HtmlCleaningProfile::Slim)
}
pub fn estimate_savings(&self, analysis: &ContentAnalysis) -> usize {
match self {
HtmlCleaningProfile::Raw => 0,
HtmlCleaningProfile::Minimal => analysis.script_bytes + analysis.style_bytes,
HtmlCleaningProfile::Default => {
analysis.script_bytes + analysis.style_bytes + (analysis.base64_bytes / 2)
}
HtmlCleaningProfile::Slim => analysis.cleanable_bytes,
HtmlCleaningProfile::Aggressive => {
analysis.cleanable_bytes + (analysis.html_length / 10)
}
HtmlCleaningProfile::Auto => 0, }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
pub enum CleaningIntent {
#[default]
General,
Extraction,
Action,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct CaptureProfile {
pub full_page: bool,
pub omit_background: bool,
pub clip: Option<ClipViewport>,
pub html_cleaning: HtmlCleaningProfile,
pub html_max_bytes: usize,
pub attempt_note: Option<String>,
}
impl Default for CaptureProfile {
fn default() -> Self {
Self {
full_page: true,
omit_background: true,
clip: None,
html_cleaning: HtmlCleaningProfile::Default,
html_max_bytes: 24_000,
attempt_note: None,
}
}
}
impl CaptureProfile {
pub fn for_extraction() -> Self {
Self {
html_cleaning: HtmlCleaningProfile::Aggressive,
..Default::default()
}
}
pub fn for_action() -> Self {
Self {
html_cleaning: HtmlCleaningProfile::Minimal,
full_page: false,
..Default::default()
}
}
pub fn with_max_bytes(mut self, bytes: usize) -> Self {
self.html_max_bytes = bytes;
self
}
}
#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct ClipViewport {
pub x: f64,
pub y: f64,
pub width: f64,
pub height: f64,
}
impl ClipViewport {
pub fn new(x: f64, y: f64, width: f64, height: f64) -> Self {
Self {
x,
y,
width,
height,
}
}
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct AutomationConfig {
pub goal: String,
pub max_steps: usize,
pub timeout_ms: u64,
pub recovery_strategy: RecoveryStrategy,
pub max_retries: usize,
pub use_cache: bool,
pub capture_screenshots: bool,
pub success_urls: Vec<String>,
pub success_patterns: Vec<String>,
pub extract_on_success: bool,
pub extraction_prompt: Option<String>,
pub capture_profile: CaptureProfile,
pub retry_policy: RetryPolicy,
pub model_policy: ModelPolicy,
#[serde(skip_serializing_if = "Option::is_none")]
pub system_prompt: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub system_prompt_extra: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub user_message_extra: Option<String>,
}
impl Default for AutomationConfig {
fn default() -> Self {
Self {
goal: String::new(),
max_steps: 20,
timeout_ms: 600_000, recovery_strategy: RecoveryStrategy::Retry,
max_retries: 3,
use_cache: true,
capture_screenshots: true,
success_urls: Vec::new(),
success_patterns: Vec::new(),
extract_on_success: false,
extraction_prompt: None,
capture_profile: CaptureProfile::default(),
retry_policy: RetryPolicy::default(),
model_policy: ModelPolicy::default(),
system_prompt: None,
system_prompt_extra: None,
user_message_extra: None,
}
}
}
impl AutomationConfig {
pub fn new(goal: impl Into<String>) -> Self {
Self {
goal: goal.into(),
..Default::default()
}
}
pub fn with_max_steps(mut self, steps: usize) -> Self {
self.max_steps = steps;
self
}
pub fn with_timeout(mut self, ms: u64) -> Self {
self.timeout_ms = ms;
self
}
pub fn with_recovery(mut self, strategy: RecoveryStrategy) -> Self {
self.recovery_strategy = strategy;
self
}
pub fn with_retries(mut self, retries: usize) -> Self {
self.max_retries = retries;
self
}
pub fn with_cache(mut self, enabled: bool) -> Self {
self.use_cache = enabled;
self
}
pub fn with_screenshots(mut self, enabled: bool) -> Self {
self.capture_screenshots = enabled;
self
}
pub fn with_success_url(mut self, url: impl Into<String>) -> Self {
self.success_urls.push(url.into());
self
}
pub fn with_success_pattern(mut self, pattern: impl Into<String>) -> Self {
self.success_patterns.push(pattern.into());
self
}
pub fn with_extraction(mut self, prompt: impl Into<String>) -> Self {
self.extract_on_success = true;
self.extraction_prompt = Some(prompt.into());
self
}
pub fn with_capture_profile(mut self, profile: CaptureProfile) -> Self {
self.capture_profile = profile;
self
}
pub fn with_retry_policy(mut self, policy: RetryPolicy) -> Self {
self.retry_policy = policy;
self
}
pub fn with_model_policy(mut self, policy: ModelPolicy) -> Self {
self.model_policy = policy;
self
}
pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
self.system_prompt = Some(prompt.into());
self
}
pub fn with_system_prompt_extra(mut self, extra: impl Into<String>) -> Self {
self.system_prompt_extra = Some(extra.into());
self
}
pub fn with_user_message_extra(mut self, extra: impl Into<String>) -> Self {
self.user_message_extra = Some(extra.into());
self
}
pub fn timeout_duration(&self) -> std::time::Duration {
std::time::Duration::from_millis(self.timeout_ms)
}
pub fn is_success_url(&self, url: &str) -> bool {
self.success_urls
.iter()
.any(|pattern| url.contains(pattern))
}
pub fn matches_success_pattern(&self, text: &str) -> bool {
self.success_patterns
.iter()
.any(|pattern| text.contains(pattern))
}
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
#[serde(default)]
pub struct RemoteMultimodalConfig {
pub include_html: bool,
pub html_max_bytes: usize,
pub include_url: bool,
pub include_title: bool,
pub include_screenshot: Option<bool>,
pub temperature: f32,
pub max_tokens: u16,
pub request_json_object: bool,
pub best_effort_json_extract: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<ReasoningEffort>,
#[serde(skip_serializing_if = "Option::is_none")]
pub thinking_budget: Option<u32>,
#[serde(default = "default_max_skills_per_round")]
pub max_skills_per_round: usize,
#[serde(default = "default_max_skill_context_chars")]
pub max_skill_context_chars: usize,
pub max_rounds: usize,
pub retry: RetryPolicy,
pub capture_profiles: Vec<CaptureProfile>,
pub model_policy: ModelPolicy,
pub post_plan_wait_ms: u64,
pub max_inflight_requests: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub automation_timeout_ms: Option<u64>,
pub extra_ai_data: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub extraction_prompt: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub extraction_schema: Option<super::ExtractionSchema>,
pub screenshot: bool,
#[serde(default)]
pub tool_calling_mode: super::tool_calling::ToolCallingMode,
#[serde(default)]
pub html_diff_mode: super::html_diff::HtmlDiffMode,
#[serde(skip_serializing_if = "Option::is_none")]
pub planning_mode: Option<super::planning::PlanningModeConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
pub synthesis_config: Option<super::synthesis::SynthesisConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
pub confidence_strategy: Option<super::confidence::ConfidenceRetryStrategy>,
#[serde(skip_serializing_if = "Option::is_none")]
pub self_healing: Option<super::self_healing::SelfHealingConfig>,
#[serde(default)]
pub concurrent_execution: bool,
#[serde(default)]
pub relevance_gate: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub relevance_prompt: Option<String>,
#[serde(default)]
pub url_prefilter: bool,
#[serde(default = "default_url_prefilter_batch_size")]
pub url_prefilter_batch_size: usize,
#[serde(default = "default_url_prefilter_max_tokens")]
pub url_prefilter_max_tokens: u16,
}
impl Default for RemoteMultimodalConfig {
fn default() -> Self {
Self {
include_html: true,
html_max_bytes: 24_000,
include_url: true,
include_title: true,
include_screenshot: None, temperature: 0.1,
max_tokens: 1024,
request_json_object: true,
best_effort_json_extract: true,
reasoning_effort: None,
thinking_budget: None,
max_rounds: 6,
retry: RetryPolicy::default(),
model_policy: ModelPolicy::default(),
capture_profiles: Vec::new(),
post_plan_wait_ms: 350,
max_inflight_requests: None,
automation_timeout_ms: None,
extra_ai_data: false,
extraction_prompt: None,
extraction_schema: None,
screenshot: true,
tool_calling_mode: super::tool_calling::ToolCallingMode::default(),
html_diff_mode: super::html_diff::HtmlDiffMode::default(),
planning_mode: None,
synthesis_config: None,
confidence_strategy: None,
self_healing: None,
concurrent_execution: false,
relevance_gate: false,
relevance_prompt: None,
url_prefilter: false,
url_prefilter_batch_size: default_url_prefilter_batch_size(),
url_prefilter_max_tokens: default_url_prefilter_max_tokens(),
max_skills_per_round: default_max_skills_per_round(),
max_skill_context_chars: default_max_skill_context_chars(),
}
}
}
fn default_url_prefilter_batch_size() -> usize {
20
}
fn default_url_prefilter_max_tokens() -> u16 {
200
}
fn default_max_skills_per_round() -> usize {
3
}
fn default_max_skill_context_chars() -> usize {
4000
}
impl RemoteMultimodalConfig {
pub fn new() -> Self {
Self::default()
}
pub fn fast() -> Self {
Self {
tool_calling_mode: super::tool_calling::ToolCallingMode::Auto,
html_diff_mode: super::html_diff::HtmlDiffMode::Auto,
confidence_strategy: Some(super::confidence::ConfidenceRetryStrategy::default()),
concurrent_execution: true,
..Self::default()
}
}
pub fn fast_with_planning() -> Self {
Self {
planning_mode: Some(super::planning::PlanningModeConfig::default()),
self_healing: Some(super::self_healing::SelfHealingConfig::default()),
..Self::fast()
}
}
pub fn is_extraction_only(&self) -> bool {
self.extra_ai_data && self.max_rounds <= 1
}
pub fn with_html(mut self, include: bool) -> Self {
self.include_html = include;
self
}
pub fn with_html_max_bytes(mut self, bytes: usize) -> Self {
self.html_max_bytes = bytes;
self
}
pub fn with_temperature(mut self, temp: f32) -> Self {
self.temperature = temp;
self
}
pub fn with_max_tokens(mut self, tokens: u16) -> Self {
self.max_tokens = tokens;
self
}
pub fn with_reasoning_effort(mut self, effort: Option<ReasoningEffort>) -> Self {
self.reasoning_effort = effort;
self
}
pub fn with_thinking_budget(mut self, budget: Option<u32>) -> Self {
self.thinking_budget = budget;
self
}
pub fn with_max_rounds(mut self, rounds: usize) -> Self {
self.max_rounds = rounds;
self
}
pub fn with_retry(mut self, retry: RetryPolicy) -> Self {
self.retry = retry;
self
}
pub fn with_model_policy(mut self, policy: ModelPolicy) -> Self {
self.model_policy = policy;
self
}
pub fn with_extraction(mut self, enabled: bool) -> Self {
self.extra_ai_data = enabled;
self
}
pub fn with_extraction_prompt(mut self, prompt: impl Into<String>) -> Self {
self.extraction_prompt = Some(prompt.into());
self
}
pub fn with_extraction_schema(mut self, schema: super::ExtractionSchema) -> Self {
self.extraction_schema = Some(schema);
self
}
pub fn with_screenshot(mut self, enabled: bool) -> Self {
self.screenshot = enabled;
self
}
pub fn with_include_screenshot(mut self, include: Option<bool>) -> Self {
self.include_screenshot = include;
self
}
pub fn add_capture_profile(&mut self, profile: CaptureProfile) {
self.capture_profiles.push(profile);
}
pub fn with_tool_calling_mode(mut self, mode: super::tool_calling::ToolCallingMode) -> Self {
self.tool_calling_mode = mode;
self
}
pub fn with_html_diff_mode(mut self, mode: super::html_diff::HtmlDiffMode) -> Self {
self.html_diff_mode = mode;
self
}
pub fn with_planning_mode(mut self, config: super::planning::PlanningModeConfig) -> Self {
self.planning_mode = Some(config);
self
}
pub fn with_synthesis_config(mut self, config: super::synthesis::SynthesisConfig) -> Self {
self.synthesis_config = Some(config);
self
}
pub fn with_confidence_strategy(
mut self,
strategy: super::confidence::ConfidenceRetryStrategy,
) -> Self {
self.confidence_strategy = Some(strategy);
self
}
pub fn with_self_healing(mut self, config: super::self_healing::SelfHealingConfig) -> Self {
self.self_healing = Some(config);
self
}
pub fn with_concurrent_execution(mut self, enabled: bool) -> Self {
self.concurrent_execution = enabled;
self
}
pub fn with_relevance_gate(mut self, prompt: Option<String>) -> Self {
self.relevance_gate = true;
self.relevance_prompt = prompt;
self
}
pub fn with_url_prefilter(mut self, batch_size: Option<usize>) -> Self {
self.url_prefilter = true;
if let Some(bs) = batch_size {
self.url_prefilter_batch_size = bs;
}
self
}
}
pub use llm_models_spider::{
arena_rank, model_profile, supports_pdf, supports_video, supports_vision, ModelCapabilities,
ModelInfoEntry, ModelPricing, ModelProfile, ModelRanks, MODEL_INFO,
};
pub fn merged_config(
base: &RemoteMultimodalConfig,
override_cfg: &RemoteMultimodalConfig,
) -> RemoteMultimodalConfig {
let mut out = base.clone();
out.include_html = override_cfg.include_html;
out.html_max_bytes = override_cfg.html_max_bytes;
out.include_url = override_cfg.include_url;
out.include_title = override_cfg.include_title;
out.include_screenshot = override_cfg.include_screenshot;
out.temperature = override_cfg.temperature;
out.max_tokens = override_cfg.max_tokens;
out.request_json_object = override_cfg.request_json_object;
out.best_effort_json_extract = override_cfg.best_effort_json_extract;
out.reasoning_effort = override_cfg.reasoning_effort;
out.thinking_budget = override_cfg.thinking_budget;
out.max_rounds = override_cfg.max_rounds;
out.post_plan_wait_ms = override_cfg.post_plan_wait_ms;
out.retry = override_cfg.retry.clone();
out.model_policy = override_cfg.model_policy.clone();
if !override_cfg.capture_profiles.is_empty() {
out.capture_profiles = override_cfg.capture_profiles.clone();
}
out.extra_ai_data = override_cfg.extra_ai_data;
out.extraction_prompt = override_cfg.extraction_prompt.clone();
out.extraction_schema = override_cfg.extraction_schema.clone();
out.screenshot = override_cfg.screenshot;
out.relevance_gate = override_cfg.relevance_gate;
out.relevance_prompt = override_cfg.relevance_prompt.clone();
out.url_prefilter = override_cfg.url_prefilter;
out.url_prefilter_batch_size = override_cfg.url_prefilter_batch_size;
out.url_prefilter_max_tokens = override_cfg.url_prefilter_max_tokens;
out
}
pub fn reasoning_payload(cfg: &RemoteMultimodalConfig) -> Option<serde_json::Value> {
cfg.reasoning_effort.map(|effort| {
let effort = match effort {
ReasoningEffort::Low => "low",
ReasoningEffort::Medium => "medium",
ReasoningEffort::High => "high",
};
serde_json::json!({ "effort": effort })
})
}
#[inline]
pub fn thinking_payload(cfg: &RemoteMultimodalConfig) -> Option<serde_json::Value> {
cfg.thinking_budget.map(|budget| {
serde_json::json!({
"type": "enabled",
"budget_tokens": budget
})
})
}
#[inline]
pub fn is_anthropic_endpoint(url: &str) -> bool {
url.contains("api.anthropic.com")
}
#[inline]
pub fn effective_thinking_budget(cfg: &RemoteMultimodalConfig) -> Option<u32> {
if let Some(budget) = cfg.thinking_budget {
return Some(budget);
}
cfg.reasoning_effort.map(|effort| match effort {
ReasoningEffort::Low => 4096,
ReasoningEffort::Medium => 8192,
ReasoningEffort::High => 16384,
})
}
#[inline]
pub fn effective_thinking_payload(cfg: &RemoteMultimodalConfig) -> Option<serde_json::Value> {
effective_thinking_budget(cfg).map(|budget| {
serde_json::json!({
"type": "enabled",
"budget_tokens": budget
})
})
}
pub fn is_url_allowed(gate: Option<&crate::PromptUrlGate>, url: &str) -> bool {
match gate {
Some(g) => g.is_allowed(url),
None => true,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_automation_config() {
let config = AutomationConfig::new("Login to dashboard")
.with_max_steps(10)
.with_timeout(60_000)
.with_success_url("/dashboard")
.with_extraction("Extract user info");
assert_eq!(config.goal, "Login to dashboard");
assert_eq!(config.max_steps, 10);
assert!(config.extract_on_success);
assert!(config.is_success_url("https://example.com/dashboard"));
}
#[test]
fn test_retry_policy() {
let policy = RetryPolicy::new(5).with_backoff(2000);
assert_eq!(policy.max_attempts, 5);
assert_eq!(policy.backoff_ms, 2000);
assert_eq!(policy.backoff_duration(), Duration::from_millis(2000));
}
#[test]
fn test_model_policy() {
let policy = ModelPolicy::default();
assert_eq!(policy.model_for_tier(CostTier::Low), "gpt-4o-mini");
assert_eq!(policy.model_for_tier(CostTier::High), "gpt-4o");
}
#[test]
fn test_remote_multimodal_config_defaults() {
let cfg = RemoteMultimodalConfig::default();
assert!(cfg.include_html);
assert_eq!(cfg.html_max_bytes, 24_000);
assert!(cfg.include_url);
assert!(cfg.include_title);
assert_eq!(cfg.temperature, 0.1);
assert_eq!(cfg.max_tokens, 1024);
assert!(cfg.request_json_object);
assert!(cfg.best_effort_json_extract);
assert!(cfg.reasoning_effort.is_none());
assert_eq!(cfg.max_rounds, 6);
assert!(cfg.screenshot);
assert!(!cfg.extra_ai_data);
}
#[test]
fn test_remote_multimodal_config_builder() {
let cfg = RemoteMultimodalConfig::new()
.with_html(false)
.with_temperature(0.5)
.with_reasoning_effort(Some(ReasoningEffort::High))
.with_max_rounds(10)
.with_extraction(true)
.with_extraction_prompt("Extract products");
assert!(!cfg.include_html);
assert_eq!(cfg.temperature, 0.5);
assert_eq!(cfg.reasoning_effort, Some(ReasoningEffort::High));
assert_eq!(cfg.max_rounds, 10);
assert!(cfg.extra_ai_data);
assert_eq!(cfg.extraction_prompt, Some("Extract products".to_string()));
}
#[test]
fn test_reasoning_payload_helper() {
let cfg = RemoteMultimodalConfig::default();
assert!(reasoning_payload(&cfg).is_none());
let cfg =
RemoteMultimodalConfig::default().with_reasoning_effort(Some(ReasoningEffort::Low));
assert_eq!(
reasoning_payload(&cfg),
Some(serde_json::json!({ "effort": "low" }))
);
let cfg =
RemoteMultimodalConfig::default().with_reasoning_effort(Some(ReasoningEffort::Medium));
assert_eq!(
reasoning_payload(&cfg),
Some(serde_json::json!({ "effort": "medium" }))
);
let cfg =
RemoteMultimodalConfig::default().with_reasoning_effort(Some(ReasoningEffort::High));
assert_eq!(
reasoning_payload(&cfg),
Some(serde_json::json!({ "effort": "high" }))
);
}
#[test]
fn test_merged_config_includes_reasoning_effort() {
let base =
RemoteMultimodalConfig::default().with_reasoning_effort(Some(ReasoningEffort::Low));
let override_cfg =
RemoteMultimodalConfig::default().with_reasoning_effort(Some(ReasoningEffort::High));
let merged = merged_config(&base, &override_cfg);
assert_eq!(merged.reasoning_effort, Some(ReasoningEffort::High));
}
#[test]
fn test_thinking_payload() {
let cfg = RemoteMultimodalConfig::default();
assert!(thinking_payload(&cfg).is_none());
let cfg = RemoteMultimodalConfig::default().with_thinking_budget(Some(10000));
assert_eq!(
thinking_payload(&cfg),
Some(serde_json::json!({"type": "enabled", "budget_tokens": 10000}))
);
}
#[test]
fn test_is_anthropic_endpoint() {
assert!(is_anthropic_endpoint(
"https://api.anthropic.com/v1/messages"
));
assert!(!is_anthropic_endpoint(
"https://api.openai.com/v1/chat/completions"
));
assert!(!is_anthropic_endpoint(
"https://openrouter.ai/api/v1/chat/completions"
));
}
#[test]
fn test_merged_config_includes_thinking_budget() {
let base = RemoteMultimodalConfig::default().with_thinking_budget(Some(5000));
let override_cfg = RemoteMultimodalConfig::default().with_thinking_budget(Some(15000));
let merged = merged_config(&base, &override_cfg);
assert_eq!(merged.thinking_budget, Some(15000));
}
#[test]
fn test_html_cleaning_profile_analysis() {
use super::ContentAnalysis;
let analysis = ContentAnalysis {
svg_bytes: 150_000, ..Default::default()
};
assert_eq!(
HtmlCleaningProfile::from_content_analysis(&analysis),
HtmlCleaningProfile::Slim
);
let analysis = ContentAnalysis {
html_length: 5_000,
text_ratio: 0.3,
..Default::default()
};
assert_eq!(
HtmlCleaningProfile::from_content_analysis(&analysis),
HtmlCleaningProfile::Minimal
);
}
#[test]
fn test_html_cleaning_profile_estimate_savings() {
use super::ContentAnalysis;
let analysis = ContentAnalysis {
script_bytes: 10_000,
style_bytes: 5_000,
cleanable_bytes: 20_000,
html_length: 50_000,
..Default::default()
};
assert_eq!(HtmlCleaningProfile::Raw.estimate_savings(&analysis), 0);
assert_eq!(
HtmlCleaningProfile::Minimal.estimate_savings(&analysis),
15_000
);
assert_eq!(
HtmlCleaningProfile::Slim.estimate_savings(&analysis),
20_000
);
}
#[test]
fn test_supports_vision_openai() {
assert!(supports_vision("gpt-4o"));
assert!(supports_vision("gpt-4o-mini"));
assert!(supports_vision("gpt-4-turbo"));
assert!(supports_vision("o1"));
assert!(supports_vision("o3"));
assert!(!supports_vision("gpt-3.5-turbo"));
}
#[test]
fn test_supports_vision_anthropic() {
assert!(supports_vision("claude-3-sonnet-20240229"));
assert!(supports_vision("claude-3-opus-20240229"));
assert!(supports_vision("claude-3-haiku-20240307"));
assert!(supports_vision("claude-3-5-sonnet-20241022"));
assert!(!supports_vision("claude-2"));
assert!(!supports_vision("claude-2.1"));
assert!(!supports_vision("claude-instant-1.2"));
}
#[test]
fn test_supports_vision_qwen() {
assert!(supports_vision("qwen2-vl-72b"));
assert!(supports_vision("qwen2.5-vl-7b"));
assert!(supports_vision("qwen-vl-max"));
assert!(supports_vision("qwq-32b"));
assert!(!supports_vision("qwen2-72b"));
assert!(!supports_vision("qwen2.5-7b"));
}
#[test]
fn test_supports_vision_gemini() {
assert!(supports_vision("gemini-1.5-pro"));
assert!(supports_vision("gemini-1.5-flash"));
assert!(supports_vision("gemini-2.0-flash"));
assert!(supports_vision("gemini-pro-vision"));
}
#[test]
fn test_supports_vision_other() {
assert!(supports_vision("pixtral-12b"));
assert!(supports_vision("llama-3.2-11b-vision-instruct"));
assert!(supports_vision("molmo-2-8b"));
assert!(!supports_vision("llama-3-70b-instruct"));
assert!(!supports_vision("mistral-7b-instruct"));
assert!(!supports_vision("mixtral-8x7b-instruct"));
}
#[test]
fn test_supports_vision_case_insensitive() {
assert!(supports_vision("GPT-4O"));
assert!(supports_vision("Claude-3-Sonnet"));
assert!(supports_vision("QWEN2-VL"));
}
#[test]
fn test_is_extraction_only() {
let cfg = RemoteMultimodalConfig::default();
assert!(!cfg.is_extraction_only());
let cfg = RemoteMultimodalConfig::new()
.with_extraction(true)
.with_max_rounds(6);
assert!(!cfg.is_extraction_only());
let cfg = RemoteMultimodalConfig::new().with_max_rounds(1);
assert!(!cfg.is_extraction_only());
let cfg = RemoteMultimodalConfig::new()
.with_extraction(true)
.with_max_rounds(1);
assert!(cfg.is_extraction_only());
let cfg = RemoteMultimodalConfig::new()
.with_extraction(true)
.with_max_rounds(0);
assert!(cfg.is_extraction_only());
}
#[test]
fn test_model_endpoint_new() {
let ep = ModelEndpoint::new("gpt-4o-mini");
assert_eq!(ep.model_name, "gpt-4o-mini");
assert!(ep.api_url.is_none());
assert!(ep.api_key.is_none());
}
#[test]
fn test_model_endpoint_with_overrides() {
let ep = ModelEndpoint::new("gpt-4o")
.with_api_url("https://api.openai.com/v1/chat/completions")
.with_api_key("sk-test");
assert_eq!(ep.model_name, "gpt-4o");
assert_eq!(
ep.api_url.as_deref(),
Some("https://api.openai.com/v1/chat/completions")
);
assert_eq!(ep.api_key.as_deref(), Some("sk-test"));
}
#[test]
fn test_model_endpoint_serde_roundtrip() {
let ep = ModelEndpoint::new("gpt-4o")
.with_api_url("https://api.openai.com/v1/chat/completions")
.with_api_key("sk-test");
let json = serde_json::to_string(&ep).unwrap();
let deserialized: ModelEndpoint = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.model_name, "gpt-4o");
assert_eq!(
deserialized.api_url.as_deref(),
Some("https://api.openai.com/v1/chat/completions")
);
assert_eq!(deserialized.api_key.as_deref(), Some("sk-test"));
}
#[test]
fn test_model_endpoint_serde_minimal() {
let json = r#"{"model_name":"gpt-4o-mini"}"#;
let ep: ModelEndpoint = serde_json::from_str(json).unwrap();
assert_eq!(ep.model_name, "gpt-4o-mini");
assert!(ep.api_url.is_none());
assert!(ep.api_key.is_none());
}
#[test]
fn test_vision_route_mode_serde() {
let mode = VisionRouteMode::TextFirst;
let json = serde_json::to_string(&mode).unwrap();
let deserialized: VisionRouteMode = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized, VisionRouteMode::TextFirst);
let mode = VisionRouteMode::VisionFirst;
let json = serde_json::to_string(&mode).unwrap();
let deserialized: VisionRouteMode = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized, VisionRouteMode::VisionFirst);
}
}