#![allow(dead_code)]
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum WebOperationType {
HttpGet {
url: String,
headers: Option<HashMap<String, String>>,
},
HttpPost {
url: String,
headers: Option<HashMap<String, String>>,
body: String,
},
WebScrape {
url: String,
selector: String,
wait_for_element: Option<String>,
},
Screenshot {
url: String,
width: Option<u32>,
height: Option<u32>,
full_page: Option<bool>,
},
PdfGeneration {
url: String,
page_size: Option<String>,
},
FormSubmit {
url: String,
form_data: HashMap<String, String>,
},
ApiInteraction {
base_url: String,
endpoints: Vec<String>,
rate_limit_ms: Option<u64>,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebOperationRequest {
pub operation: WebOperationType,
pub vm_id: Option<String>,
pub timeout_ms: Option<u64>,
pub user_agent: Option<String>,
pub proxy: Option<ProxyConfig>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProxyConfig {
pub host: String,
pub port: u16,
pub username: Option<String>,
pub password: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebOperationResult {
pub operation_id: String,
pub vm_id: String,
pub operation_type: String,
pub status: OperationStatus,
pub result_data: WebResultData,
pub metadata: OperationMetadata,
pub error: Option<String>,
pub started_at: String,
pub completed_at: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum OperationStatus {
Pending,
Running,
Completed,
Failed,
Timeout,
Cancelled,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum WebResultData {
#[serde(rename = "http_response")]
HttpResponse {
status_code: u16,
status_text: String,
headers: HashMap<String, String>,
body: String,
content_type: String,
content_length: u64,
},
#[serde(rename = "scraped_content")]
ScrapedContent {
elements: Vec<ScrapedElement>,
page_title: String,
page_url: String,
scrape_duration_ms: u64,
},
#[serde(rename = "screenshot")]
Screenshot {
image_data: String, format: String,
width: u32,
height: u32,
file_size_bytes: u64,
},
#[serde(rename = "pdf_data")]
PdfData {
pdf_data: String, filename: String,
page_count: u32,
file_size_bytes: u64,
},
#[serde(rename = "form_result")]
FormResult {
response_status: u16,
response_headers: HashMap<String, String>,
response_body: String,
submitted_fields: Vec<String>,
},
#[serde(rename = "api_results")]
ApiResults {
results: Vec<ApiEndpointResult>,
total_requests: u32,
successful_requests: u32,
failed_requests: u32,
total_duration_ms: u64,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScrapedElement {
pub selector: String,
pub content: String,
pub html: String,
pub attributes: HashMap<String, String>,
pub position: ElementPosition,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ElementPosition {
pub x: f64,
pub y: f64,
pub width: f64,
pub height: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApiEndpointResult {
pub endpoint: String,
pub method: String,
pub status_code: u16,
pub response_time_ms: u64,
pub response_body: String,
pub success: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OperationMetadata {
pub vm_id: String,
pub user_id: Option<String>,
pub session_id: Option<String>,
pub execution_duration_ms: u64,
pub network_requests_count: u32,
pub data_transferred_bytes: u64,
pub security_context: SecurityContext,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SecurityContext {
pub allowed_domains: Vec<String>,
pub blocked_domains: Vec<String>,
pub max_response_size_mb: u32,
pub max_execution_time_ms: u64,
pub allow_javascript: bool,
pub allow_cookies: bool,
pub sandbox_enabled: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebOperationConfig {
pub default_timeout_ms: u64,
pub max_concurrent_operations: u32,
pub security: SecurityContext,
pub browser_settings: BrowserSettings,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BrowserSettings {
pub user_agent: String,
pub viewport_width: u32,
pub viewport_height: u32,
pub enable_javascript: bool,
pub enable_images: bool,
pub enable_css: bool,
pub timezone: String,
}
impl Default for WebOperationConfig {
fn default() -> Self {
Self {
default_timeout_ms: 30000, max_concurrent_operations: 5,
security: SecurityContext {
allowed_domains: vec![
"https://httpbin.org".to_string(),
"https://jsonplaceholder.typicode.com".to_string(),
"https://api.github.com".to_string(),
],
blocked_domains: vec!["malware.example.com".to_string()],
max_response_size_mb: 10,
max_execution_time_ms: 60000, allow_javascript: true,
allow_cookies: false,
sandbox_enabled: true,
},
browser_settings: BrowserSettings {
user_agent: "Terraphim-TUI-WebBot/1.0".to_string(),
viewport_width: 1920,
viewport_height: 1080,
enable_javascript: true,
enable_images: true,
enable_css: true,
timezone: "UTC".to_string(),
},
}
}
}
pub struct WebOperationBuilder {
operation: WebOperationType,
vm_id: Option<String>,
timeout_ms: Option<u64>,
user_agent: Option<String>,
proxy: Option<ProxyConfig>,
}
impl WebOperationBuilder {
pub fn new(operation: WebOperationType) -> Self {
Self {
operation,
vm_id: None,
timeout_ms: None,
user_agent: None,
proxy: None,
}
}
pub fn vm_id(mut self, vm_id: impl Into<String>) -> Self {
self.vm_id = Some(vm_id.into());
self
}
pub fn timeout_ms(mut self, timeout_ms: u64) -> Self {
self.timeout_ms = Some(timeout_ms);
self
}
pub fn user_agent(mut self, user_agent: impl Into<String>) -> Self {
self.user_agent = Some(user_agent.into());
self
}
pub fn proxy(mut self, proxy: ProxyConfig) -> Self {
self.proxy = Some(proxy);
self
}
pub fn build(self) -> WebOperationRequest {
WebOperationRequest {
operation: self.operation,
vm_id: self.vm_id,
timeout_ms: self.timeout_ms,
user_agent: self.user_agent,
proxy: self.proxy,
}
}
}
impl WebOperationType {
pub fn http_get(url: impl Into<String>) -> Self {
Self::HttpGet {
url: url.into(),
headers: None,
}
}
pub fn http_get_with_headers(url: impl Into<String>, headers: HashMap<String, String>) -> Self {
Self::HttpGet {
url: url.into(),
headers: Some(headers),
}
}
pub fn http_post(url: impl Into<String>, body: impl Into<String>) -> Self {
Self::HttpPost {
url: url.into(),
headers: None,
body: body.into(),
}
}
pub fn http_post_with_headers(
url: impl Into<String>,
headers: HashMap<String, String>,
body: impl Into<String>,
) -> Self {
Self::HttpPost {
url: url.into(),
headers: Some(headers),
body: body.into(),
}
}
pub fn scrape(url: impl Into<String>, selector: impl Into<String>) -> Self {
Self::WebScrape {
url: url.into(),
selector: selector.into(),
wait_for_element: None,
}
}
pub fn scrape_with_wait(
url: impl Into<String>,
selector: impl Into<String>,
wait_for_element: impl Into<String>,
) -> Self {
Self::WebScrape {
url: url.into(),
selector: selector.into(),
wait_for_element: Some(wait_for_element.into()),
}
}
pub fn screenshot(url: impl Into<String>) -> Self {
Self::Screenshot {
url: url.into(),
width: None,
height: None,
full_page: None,
}
}
pub fn screenshot_with_dimensions(url: impl Into<String>, width: u32, height: u32) -> Self {
Self::Screenshot {
url: url.into(),
width: Some(width),
height: Some(height),
full_page: None,
}
}
pub fn full_page_screenshot(url: impl Into<String>) -> Self {
Self::Screenshot {
url: url.into(),
width: None,
height: None,
full_page: Some(true),
}
}
pub fn generate_pdf(url: impl Into<String>) -> Self {
Self::PdfGeneration {
url: url.into(),
page_size: None,
}
}
pub fn generate_pdf_with_page_size(
url: impl Into<String>,
page_size: impl Into<String>,
) -> Self {
Self::PdfGeneration {
url: url.into(),
page_size: Some(page_size.into()),
}
}
pub fn submit_form(url: impl Into<String>, form_data: HashMap<String, String>) -> Self {
Self::FormSubmit {
url: url.into(),
form_data,
}
}
pub fn api_interaction(base_url: impl Into<String>, endpoints: Vec<impl Into<String>>) -> Self {
Self::ApiInteraction {
base_url: base_url.into(),
endpoints: endpoints.into_iter().map(|e| e.into()).collect(),
rate_limit_ms: None,
}
}
pub fn api_interaction_with_rate_limit(
base_url: impl Into<String>,
endpoints: Vec<impl Into<String>>,
rate_limit_ms: u64,
) -> Self {
Self::ApiInteraction {
base_url: base_url.into(),
endpoints: endpoints.into_iter().map(|e| e.into()).collect(),
rate_limit_ms: Some(rate_limit_ms),
}
}
}
pub mod utils {
use super::*;
pub fn default_headers() -> HashMap<String, String> {
let mut headers = HashMap::new();
headers.insert("Accept".to_string(), "application/json".to_string());
headers.insert(
"User-Agent".to_string(),
"Terraphim-TUI-WebBot/1.0".to_string(),
);
headers.insert("Accept-Language".to_string(), "en-US,en;q=0.9".to_string());
headers
}
pub fn form_headers() -> HashMap<String, String> {
let mut headers = default_headers();
headers.insert(
"Content-Type".to_string(),
"application/x-www-form-urlencoded".to_string(),
);
headers
}
pub fn json_headers() -> HashMap<String, String> {
let mut headers = default_headers();
headers.insert("Content-Type".to_string(), "application/json".to_string());
headers
}
pub fn validate_url(url: &str) -> Result<()> {
if url.is_empty() {
anyhow::bail!("URL cannot be empty");
}
if !url.starts_with("http://") && !url.starts_with("https://") {
anyhow::bail!("URL must start with http:// or https://");
}
let url_parser = reqwest::Url::parse(url);
if url_parser.is_err() {
anyhow::bail!("Invalid URL format: {}", url);
}
Ok(())
}
pub fn extract_domain(url: &str) -> Result<String> {
let parsed_url =
reqwest::Url::parse(url).map_err(|e| anyhow::anyhow!("Failed to parse URL: {}", e))?;
Ok(parsed_url
.host_str()
.ok_or_else(|| anyhow::anyhow!("URL has no host"))?
.to_string())
}
pub fn generate_operation_id() -> String {
use std::time::{SystemTime, UNIX_EPOCH};
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis();
format!("webop-{}", timestamp)
}
pub fn estimate_complexity(operation: &WebOperationType) -> OperationComplexity {
match operation {
WebOperationType::HttpGet { .. } => OperationComplexity::Low,
WebOperationType::HttpPost { .. } => OperationComplexity::Medium,
WebOperationType::WebScrape { .. } => OperationComplexity::High,
WebOperationType::Screenshot { .. } => OperationComplexity::High,
WebOperationType::PdfGeneration { .. } => OperationComplexity::High,
WebOperationType::FormSubmit { .. } => OperationComplexity::Medium,
WebOperationType::ApiInteraction { endpoints, .. } => {
if endpoints.len() > 5 {
OperationComplexity::High
} else if endpoints.len() > 2 {
OperationComplexity::Medium
} else {
OperationComplexity::Low
}
}
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum OperationComplexity {
Low,
Medium,
High,
}
impl OperationComplexity {
pub fn recommended_timeout_ms(&self) -> u64 {
match self {
OperationComplexity::Low => 10000, OperationComplexity::Medium => 30000, OperationComplexity::High => 60000, }
}
pub fn recommended_retries(&self) -> u32 {
match self {
OperationComplexity::Low => 2,
OperationComplexity::Medium => 3,
OperationComplexity::High => 5,
}
}
}