Skip to main content

harness_webfetch/
types.rs

1use harness_core::{PermissionPolicy, ToolError};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::sync::{Arc, Mutex};
5
6use crate::engine::WebFetchEngine;
7
8#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
9#[serde(rename_all = "UPPERCASE")]
10pub enum WebFetchMethod {
11    Get,
12    Post,
13}
14
15impl WebFetchMethod {
16    pub fn as_str(&self) -> &'static str {
17        match self {
18            Self::Get => "GET",
19            Self::Post => "POST",
20        }
21    }
22}
23
24#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
25#[serde(rename_all = "snake_case")]
26pub enum WebFetchExtract {
27    Markdown,
28    Raw,
29    Both,
30}
31
32impl WebFetchExtract {
33    pub fn as_str(&self) -> &'static str {
34        match self {
35            Self::Markdown => "markdown",
36            Self::Raw => "raw",
37            Self::Both => "both",
38        }
39    }
40}
41
42/// Session permission policy plus the autonomous escape hatch for tests.
43#[derive(Clone)]
44pub struct WebFetchPermissionPolicy {
45    pub inner: PermissionPolicy,
46    pub unsafe_allow_fetch_without_hook: bool,
47}
48
49impl WebFetchPermissionPolicy {
50    pub fn new(inner: PermissionPolicy) -> Self {
51        Self {
52            inner,
53            unsafe_allow_fetch_without_hook: false,
54        }
55    }
56
57    pub fn with_unsafe_bypass(mut self, v: bool) -> Self {
58        self.unsafe_allow_fetch_without_hook = v;
59        self
60    }
61}
62
63impl std::fmt::Debug for WebFetchPermissionPolicy {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        f.debug_struct("WebFetchPermissionPolicy")
66            .field(
67                "unsafe_allow_fetch_without_hook",
68                &self.unsafe_allow_fetch_without_hook,
69            )
70            .field("inner", &self.inner)
71            .finish()
72    }
73}
74
75pub type WebFetchCache = Arc<Mutex<HashMap<String, CachedResponse>>>;
76
77#[derive(Clone)]
78pub struct WebFetchSessionConfig {
79    pub permissions: WebFetchPermissionPolicy,
80    pub engine: Arc<dyn WebFetchEngine>,
81    pub default_headers: Option<HashMap<String, String>>,
82    pub allow_loopback: bool,
83    pub allow_private_networks: bool,
84    pub allow_metadata: bool,
85    pub resolve_once: bool,
86    pub default_timeout_ms: Option<u64>,
87    pub session_backstop_ms: Option<u64>,
88    pub max_redirects: Option<u32>,
89    pub inline_markdown_cap: Option<usize>,
90    pub inline_raw_cap: Option<usize>,
91    pub spill_hard_cap: Option<usize>,
92    pub cache_ttl_ms: Option<u64>,
93    pub spill_dir: Option<String>,
94    pub session_id: Option<String>,
95    pub cache: Option<WebFetchCache>,
96}
97
98impl WebFetchSessionConfig {
99    pub fn new(
100        permissions: WebFetchPermissionPolicy,
101        engine: Arc<dyn WebFetchEngine>,
102    ) -> Self {
103        Self {
104            permissions,
105            engine,
106            default_headers: None,
107            allow_loopback: false,
108            allow_private_networks: false,
109            allow_metadata: false,
110            resolve_once: true,
111            default_timeout_ms: None,
112            session_backstop_ms: None,
113            max_redirects: None,
114            inline_markdown_cap: None,
115            inline_raw_cap: None,
116            spill_hard_cap: None,
117            cache_ttl_ms: None,
118            spill_dir: None,
119            session_id: None,
120            cache: None,
121        }
122    }
123
124    pub fn with_cache(mut self) -> Self {
125        self.cache = Some(Arc::new(Mutex::new(HashMap::new())));
126        self
127    }
128}
129
130impl std::fmt::Debug for WebFetchSessionConfig {
131    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
132        f.debug_struct("WebFetchSessionConfig")
133            .field("permissions", &self.permissions)
134            .field("allow_loopback", &self.allow_loopback)
135            .field("allow_private_networks", &self.allow_private_networks)
136            .field("allow_metadata", &self.allow_metadata)
137            .field("has_cache", &self.cache.is_some())
138            .finish()
139    }
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct CachedResponse {
144    pub at_ms: u64,
145    pub status: u16,
146    pub final_url: String,
147    pub redirect_chain: Vec<String>,
148    pub content_type: String,
149    pub body: Vec<u8>,
150    pub extract: WebFetchExtract,
151    #[serde(default, skip_serializing_if = "Option::is_none")]
152    pub extracted_markdown: Option<String>,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct FetchMetadata {
157    pub url: String,
158    pub final_url: String,
159    pub method: WebFetchMethod,
160    pub status: u16,
161    pub content_type: String,
162    pub redirect_chain: Vec<String>,
163    pub fetched_ms: u64,
164    pub from_cache: bool,
165    #[serde(default, skip_serializing_if = "Option::is_none")]
166    pub cache_age_sec: Option<u64>,
167}
168
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct WebFetchOk {
171    pub output: String,
172    pub meta: FetchMetadata,
173    #[serde(default, skip_serializing_if = "Option::is_none")]
174    pub body_markdown: Option<String>,
175    #[serde(default, skip_serializing_if = "Option::is_none")]
176    pub body_raw: Option<String>,
177    #[serde(default, skip_serializing_if = "Option::is_none")]
178    pub log_path: Option<String>,
179    pub byte_cap: bool,
180}
181
182#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct WebFetchRedirectLoop {
184    pub output: String,
185    pub meta: FetchMetadata,
186}
187
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct WebFetchHttpError {
190    pub output: String,
191    pub meta: FetchMetadata,
192    pub body_raw: String,
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct WebFetchError {
197    pub error: ToolError,
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize)]
201#[serde(tag = "kind", rename_all = "snake_case")]
202pub enum WebFetchResult {
203    #[serde(rename = "ok")]
204    Ok(WebFetchOk),
205    #[serde(rename = "redirect_loop")]
206    RedirectLoop(WebFetchRedirectLoop),
207    #[serde(rename = "http_error")]
208    HttpError(WebFetchHttpError),
209    #[serde(rename = "error")]
210    Error(WebFetchError),
211}
212
213impl From<WebFetchError> for WebFetchResult {
214    fn from(e: WebFetchError) -> Self {
215        WebFetchResult::Error(e)
216    }
217}