1use std::net::{IpAddr, SocketAddr};
20use std::sync::Arc;
21use std::time::{Duration, Instant};
22
23use schemars::JsonSchema;
24use serde::Deserialize;
25use url::Url;
26
27use zeph_common::ToolName;
28
29use crate::audit::{AuditEntry, AuditLogger, AuditResult, chrono_now};
30use crate::config::ScrapeConfig;
31use crate::executor::{
32 ClaimSource, ToolCall, ToolError, ToolExecutor, ToolOutput, deserialize_params,
33};
34use crate::net::is_private_ip;
35
36#[derive(Debug, Deserialize, JsonSchema)]
37struct FetchParams {
38 url: String,
40}
41
42#[derive(Debug, Deserialize, JsonSchema)]
43struct ScrapeInstruction {
44 url: String,
46 select: String,
48 #[serde(default = "default_extract")]
50 extract: String,
51 limit: Option<usize>,
53}
54
55fn default_extract() -> String {
56 "text".into()
57}
58
59#[derive(Debug)]
60enum ExtractMode {
61 Text,
62 Html,
63 Attr(String),
64}
65
66impl ExtractMode {
67 fn parse(s: &str) -> Self {
68 match s {
69 "text" => Self::Text,
70 "html" => Self::Html,
71 attr if attr.starts_with("attr:") => {
72 Self::Attr(attr.strip_prefix("attr:").unwrap_or(attr).to_owned())
73 }
74 _ => Self::Text,
75 }
76 }
77}
78
79#[derive(Debug)]
118pub struct WebScrapeExecutor {
119 timeout: Duration,
120 max_body_bytes: usize,
121 allowed_domains: Vec<String>,
122 denied_domains: Vec<String>,
123 audit_logger: Option<Arc<AuditLogger>>,
124}
125
126impl WebScrapeExecutor {
127 #[must_use]
131 pub fn new(config: &ScrapeConfig) -> Self {
132 Self {
133 timeout: Duration::from_secs(config.timeout),
134 max_body_bytes: config.max_body_bytes,
135 allowed_domains: config.allowed_domains.clone(),
136 denied_domains: config.denied_domains.clone(),
137 audit_logger: None,
138 }
139 }
140
141 #[must_use]
143 pub fn with_audit(mut self, logger: Arc<AuditLogger>) -> Self {
144 self.audit_logger = Some(logger);
145 self
146 }
147
148 fn build_client(&self, host: &str, addrs: &[SocketAddr]) -> reqwest::Client {
149 let mut builder = reqwest::Client::builder()
150 .timeout(self.timeout)
151 .redirect(reqwest::redirect::Policy::none());
152 builder = builder.resolve_to_addrs(host, addrs);
153 builder.build().unwrap_or_default()
154 }
155}
156
157impl ToolExecutor for WebScrapeExecutor {
158 fn tool_definitions(&self) -> Vec<crate::registry::ToolDef> {
159 use crate::registry::{InvocationHint, ToolDef};
160 vec![
161 ToolDef {
162 id: "web_scrape".into(),
163 description: "Extract structured data from a web page using CSS selectors.\n\nONLY call this tool when the user has explicitly provided a URL in their message, or when a prior tool call returned a URL to retrieve. NEVER construct, guess, or infer a URL from entity names, brand knowledge, or domain patterns.\n\nParameters: url (string, required) - HTTPS URL; select (string, required) - CSS selector; extract (string, optional) - \"text\", \"html\", or \"attr:<name>\"; limit (integer, optional) - max results\nReturns: extracted text/HTML/attribute values, one per line\nErrors: InvalidParams if URL is not HTTPS or selector is empty; Timeout after configured seconds; connection/DNS failures".into(),
164 schema: schemars::schema_for!(ScrapeInstruction),
165 invocation: InvocationHint::FencedBlock("scrape"),
166 },
167 ToolDef {
168 id: "fetch".into(),
169 description: "Fetch a URL and return the response body as plain text.\n\nONLY call this tool when the user has explicitly provided a URL in their message, or when a prior tool call returned a URL to retrieve. NEVER construct, guess, or infer a URL from entity names, brand knowledge, or domain patterns. If no URL is present in the conversation, do not call this tool.\n\nParameters: url (string, required) - HTTPS URL to fetch\nReturns: response body as UTF-8 text, truncated if exceeding max body size\nErrors: InvalidParams if URL is not HTTPS; Timeout; SSRF-blocked private IPs; connection failures".into(),
170 schema: schemars::schema_for!(FetchParams),
171 invocation: InvocationHint::ToolCall,
172 },
173 ]
174 }
175
176 async fn execute(&self, response: &str) -> Result<Option<ToolOutput>, ToolError> {
177 let blocks = extract_scrape_blocks(response);
178 if blocks.is_empty() {
179 return Ok(None);
180 }
181
182 let mut outputs = Vec::with_capacity(blocks.len());
183 #[allow(clippy::cast_possible_truncation)]
184 let blocks_executed = blocks.len() as u32;
185
186 for block in &blocks {
187 let instruction: ScrapeInstruction = serde_json::from_str(block).map_err(|e| {
188 ToolError::Execution(std::io::Error::new(
189 std::io::ErrorKind::InvalidData,
190 e.to_string(),
191 ))
192 })?;
193 let start = Instant::now();
194 let scrape_result = self.scrape_instruction(&instruction).await;
195 #[allow(clippy::cast_possible_truncation)]
196 let duration_ms = start.elapsed().as_millis() as u64;
197 match scrape_result {
198 Ok(output) => {
199 self.log_audit(
200 "web_scrape",
201 &instruction.url,
202 AuditResult::Success,
203 duration_ms,
204 None,
205 )
206 .await;
207 outputs.push(output);
208 }
209 Err(e) => {
210 let audit_result = tool_error_to_audit_result(&e);
211 self.log_audit(
212 "web_scrape",
213 &instruction.url,
214 audit_result,
215 duration_ms,
216 Some(&e),
217 )
218 .await;
219 return Err(e);
220 }
221 }
222 }
223
224 Ok(Some(ToolOutput {
225 tool_name: ToolName::new("web-scrape"),
226 summary: outputs.join("\n\n"),
227 blocks_executed,
228 filter_stats: None,
229 diff: None,
230 streamed: false,
231 terminal_id: None,
232 locations: None,
233 raw_response: None,
234 claim_source: Some(ClaimSource::WebScrape),
235 }))
236 }
237
238 #[cfg_attr(
239 feature = "profiling",
240 tracing::instrument(name = "tool.web_scrape", skip_all)
241 )]
242 async fn execute_tool_call(&self, call: &ToolCall) -> Result<Option<ToolOutput>, ToolError> {
243 match call.tool_id.as_str() {
244 "web_scrape" => {
245 let instruction: ScrapeInstruction = deserialize_params(&call.params)?;
246 let start = Instant::now();
247 let result = self.scrape_instruction(&instruction).await;
248 #[allow(clippy::cast_possible_truncation)]
249 let duration_ms = start.elapsed().as_millis() as u64;
250 match result {
251 Ok(output) => {
252 self.log_audit(
253 "web_scrape",
254 &instruction.url,
255 AuditResult::Success,
256 duration_ms,
257 None,
258 )
259 .await;
260 Ok(Some(ToolOutput {
261 tool_name: ToolName::new("web-scrape"),
262 summary: output,
263 blocks_executed: 1,
264 filter_stats: None,
265 diff: None,
266 streamed: false,
267 terminal_id: None,
268 locations: None,
269 raw_response: None,
270 claim_source: Some(ClaimSource::WebScrape),
271 }))
272 }
273 Err(e) => {
274 let audit_result = tool_error_to_audit_result(&e);
275 self.log_audit(
276 "web_scrape",
277 &instruction.url,
278 audit_result,
279 duration_ms,
280 Some(&e),
281 )
282 .await;
283 Err(e)
284 }
285 }
286 }
287 "fetch" => {
288 let p: FetchParams = deserialize_params(&call.params)?;
289 let start = Instant::now();
290 let result = self.handle_fetch(&p).await;
291 #[allow(clippy::cast_possible_truncation)]
292 let duration_ms = start.elapsed().as_millis() as u64;
293 match result {
294 Ok(output) => {
295 self.log_audit("fetch", &p.url, AuditResult::Success, duration_ms, None)
296 .await;
297 Ok(Some(ToolOutput {
298 tool_name: ToolName::new("fetch"),
299 summary: output,
300 blocks_executed: 1,
301 filter_stats: None,
302 diff: None,
303 streamed: false,
304 terminal_id: None,
305 locations: None,
306 raw_response: None,
307 claim_source: Some(ClaimSource::WebScrape),
308 }))
309 }
310 Err(e) => {
311 let audit_result = tool_error_to_audit_result(&e);
312 self.log_audit("fetch", &p.url, audit_result, duration_ms, Some(&e))
313 .await;
314 Err(e)
315 }
316 }
317 }
318 _ => Ok(None),
319 }
320 }
321
322 fn is_tool_retryable(&self, tool_id: &str) -> bool {
323 matches!(tool_id, "web_scrape" | "fetch")
324 }
325}
326
327fn tool_error_to_audit_result(e: &ToolError) -> AuditResult {
328 match e {
329 ToolError::Blocked { command } => AuditResult::Blocked {
330 reason: command.clone(),
331 },
332 ToolError::Timeout { .. } => AuditResult::Timeout,
333 _ => AuditResult::Error {
334 message: e.to_string(),
335 },
336 }
337}
338
339impl WebScrapeExecutor {
340 async fn log_audit(
341 &self,
342 tool: &str,
343 command: &str,
344 result: AuditResult,
345 duration_ms: u64,
346 error: Option<&ToolError>,
347 ) {
348 if let Some(ref logger) = self.audit_logger {
349 let (error_category, error_domain, error_phase) =
350 error.map_or((None, None, None), |e| {
351 let cat = e.category();
352 (
353 Some(cat.label().to_owned()),
354 Some(cat.domain().label().to_owned()),
355 Some(cat.phase().label().to_owned()),
356 )
357 });
358 let entry = AuditEntry {
359 timestamp: chrono_now(),
360 tool: tool.into(),
361 command: command.into(),
362 result,
363 duration_ms,
364 error_category,
365 error_domain,
366 error_phase,
367 claim_source: Some(ClaimSource::WebScrape),
368 mcp_server_id: None,
369 injection_flagged: false,
370 embedding_anomalous: false,
371 cross_boundary_mcp_to_acp: false,
372 adversarial_policy_decision: None,
373 exit_code: None,
374 truncated: false,
375 caller_id: None,
376 policy_match: None,
377 };
378 logger.log(&entry).await;
379 }
380 }
381
382 async fn handle_fetch(&self, params: &FetchParams) -> Result<String, ToolError> {
383 let parsed = validate_url(¶ms.url)?;
384 check_domain_policy(
385 parsed.host_str().unwrap_or(""),
386 &self.allowed_domains,
387 &self.denied_domains,
388 )?;
389 let (host, addrs) = resolve_and_validate(&parsed).await?;
390 self.fetch_html(¶ms.url, &host, &addrs).await
391 }
392
393 async fn scrape_instruction(
394 &self,
395 instruction: &ScrapeInstruction,
396 ) -> Result<String, ToolError> {
397 let parsed = validate_url(&instruction.url)?;
398 check_domain_policy(
399 parsed.host_str().unwrap_or(""),
400 &self.allowed_domains,
401 &self.denied_domains,
402 )?;
403 let (host, addrs) = resolve_and_validate(&parsed).await?;
404 let html = self.fetch_html(&instruction.url, &host, &addrs).await?;
405 let selector = instruction.select.clone();
406 let extract = ExtractMode::parse(&instruction.extract);
407 let limit = instruction.limit.unwrap_or(10);
408 tokio::task::spawn_blocking(move || parse_and_extract(&html, &selector, &extract, limit))
409 .await
410 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())))?
411 }
412
413 async fn fetch_html(
423 &self,
424 url: &str,
425 host: &str,
426 addrs: &[SocketAddr],
427 ) -> Result<String, ToolError> {
428 const MAX_REDIRECTS: usize = 3;
429
430 let mut current_url = url.to_owned();
431 let mut current_host = host.to_owned();
432 let mut current_addrs = addrs.to_vec();
433
434 for hop in 0..=MAX_REDIRECTS {
435 let client = self.build_client(¤t_host, ¤t_addrs);
437 let resp = client
438 .get(¤t_url)
439 .send()
440 .await
441 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())))?;
442
443 let status = resp.status();
444
445 if status.is_redirection() {
446 if hop == MAX_REDIRECTS {
447 return Err(ToolError::Execution(std::io::Error::other(
448 "too many redirects",
449 )));
450 }
451
452 let location = resp
453 .headers()
454 .get(reqwest::header::LOCATION)
455 .and_then(|v| v.to_str().ok())
456 .ok_or_else(|| {
457 ToolError::Execution(std::io::Error::other("redirect with no Location"))
458 })?;
459
460 let base = Url::parse(¤t_url)
462 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())))?;
463 let next_url = base
464 .join(location)
465 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())))?;
466
467 let validated = validate_url(next_url.as_str())?;
468 let (next_host, next_addrs) = resolve_and_validate(&validated).await?;
469
470 current_url = next_url.to_string();
471 current_host = next_host;
472 current_addrs = next_addrs;
473 continue;
474 }
475
476 if !status.is_success() {
477 return Err(ToolError::Http {
478 status: status.as_u16(),
479 message: status.canonical_reason().unwrap_or("unknown").to_owned(),
480 });
481 }
482
483 let bytes = resp
484 .bytes()
485 .await
486 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())))?;
487
488 if bytes.len() > self.max_body_bytes {
489 return Err(ToolError::Execution(std::io::Error::other(format!(
490 "response too large: {} bytes (max: {})",
491 bytes.len(),
492 self.max_body_bytes,
493 ))));
494 }
495
496 return String::from_utf8(bytes.to_vec())
497 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())));
498 }
499
500 Err(ToolError::Execution(std::io::Error::other(
501 "too many redirects",
502 )))
503 }
504}
505
506fn extract_scrape_blocks(text: &str) -> Vec<&str> {
507 crate::executor::extract_fenced_blocks(text, "scrape")
508}
509
510fn check_domain_policy(
522 host: &str,
523 allowed_domains: &[String],
524 denied_domains: &[String],
525) -> Result<(), ToolError> {
526 if denied_domains.iter().any(|p| domain_matches(p, host)) {
527 return Err(ToolError::Blocked {
528 command: format!("domain blocked by denylist: {host}"),
529 });
530 }
531 if !allowed_domains.is_empty() {
532 let is_ip = host.parse::<std::net::IpAddr>().is_ok()
534 || (host.starts_with('[') && host.ends_with(']'));
535 if is_ip {
536 return Err(ToolError::Blocked {
537 command: format!(
538 "bare IP address not allowed when domain allowlist is active: {host}"
539 ),
540 });
541 }
542 if !allowed_domains.iter().any(|p| domain_matches(p, host)) {
543 return Err(ToolError::Blocked {
544 command: format!("domain not in allowlist: {host}"),
545 });
546 }
547 }
548 Ok(())
549}
550
551fn domain_matches(pattern: &str, host: &str) -> bool {
557 if pattern.starts_with("*.") {
558 let suffix = &pattern[1..]; if let Some(remainder) = host.strip_suffix(suffix) {
561 !remainder.is_empty() && !remainder.contains('.')
563 } else {
564 false
565 }
566 } else {
567 pattern == host
568 }
569}
570
571fn validate_url(raw: &str) -> Result<Url, ToolError> {
572 let parsed = Url::parse(raw).map_err(|_| ToolError::Blocked {
573 command: format!("invalid URL: {raw}"),
574 })?;
575
576 if parsed.scheme() != "https" {
577 return Err(ToolError::Blocked {
578 command: format!("scheme not allowed: {}", parsed.scheme()),
579 });
580 }
581
582 if let Some(host) = parsed.host()
583 && is_private_host(&host)
584 {
585 return Err(ToolError::Blocked {
586 command: format!(
587 "private/local host blocked: {}",
588 parsed.host_str().unwrap_or("")
589 ),
590 });
591 }
592
593 Ok(parsed)
594}
595
596fn is_private_host(host: &url::Host<&str>) -> bool {
597 match host {
598 url::Host::Domain(d) => {
599 #[allow(clippy::case_sensitive_file_extension_comparisons)]
602 {
603 *d == "localhost"
604 || d.ends_with(".localhost")
605 || d.ends_with(".internal")
606 || d.ends_with(".local")
607 }
608 }
609 url::Host::Ipv4(v4) => is_private_ip(IpAddr::V4(*v4)),
610 url::Host::Ipv6(v6) => is_private_ip(IpAddr::V6(*v6)),
611 }
612}
613
614async fn resolve_and_validate(url: &Url) -> Result<(String, Vec<SocketAddr>), ToolError> {
620 let Some(host) = url.host_str() else {
621 return Ok((String::new(), vec![]));
622 };
623 let port = url.port_or_known_default().unwrap_or(443);
624 let addrs: Vec<SocketAddr> = tokio::net::lookup_host(format!("{host}:{port}"))
625 .await
626 .map_err(|e| ToolError::Blocked {
627 command: format!("DNS resolution failed: {e}"),
628 })?
629 .collect();
630 for addr in &addrs {
631 if is_private_ip(addr.ip()) {
632 return Err(ToolError::Blocked {
633 command: format!("SSRF protection: private IP {} for host {host}", addr.ip()),
634 });
635 }
636 }
637 Ok((host.to_owned(), addrs))
638}
639
640fn parse_and_extract(
641 html: &str,
642 selector: &str,
643 extract: &ExtractMode,
644 limit: usize,
645) -> Result<String, ToolError> {
646 let soup = scrape_core::Soup::parse(html);
647
648 let tags = soup.find_all(selector).map_err(|e| {
649 ToolError::Execution(std::io::Error::new(
650 std::io::ErrorKind::InvalidData,
651 format!("invalid selector: {e}"),
652 ))
653 })?;
654
655 let mut results = Vec::new();
656
657 for tag in tags.into_iter().take(limit) {
658 let value = match extract {
659 ExtractMode::Text => tag.text(),
660 ExtractMode::Html => tag.inner_html(),
661 ExtractMode::Attr(name) => tag.get(name).unwrap_or_default().to_owned(),
662 };
663 if !value.trim().is_empty() {
664 results.push(value.trim().to_owned());
665 }
666 }
667
668 if results.is_empty() {
669 Ok(format!("No results for selector: {selector}"))
670 } else {
671 Ok(results.join("\n"))
672 }
673}
674
675#[cfg(test)]
676mod tests {
677 use super::*;
678
679 #[test]
682 fn extract_single_block() {
683 let text =
684 "Here:\n```scrape\n{\"url\":\"https://example.com\",\"select\":\"h1\"}\n```\nDone.";
685 let blocks = extract_scrape_blocks(text);
686 assert_eq!(blocks.len(), 1);
687 assert!(blocks[0].contains("example.com"));
688 }
689
690 #[test]
691 fn extract_multiple_blocks() {
692 let text = "```scrape\n{\"url\":\"https://a.com\",\"select\":\"h1\"}\n```\ntext\n```scrape\n{\"url\":\"https://b.com\",\"select\":\"p\"}\n```";
693 let blocks = extract_scrape_blocks(text);
694 assert_eq!(blocks.len(), 2);
695 }
696
697 #[test]
698 fn no_blocks_returns_empty() {
699 let blocks = extract_scrape_blocks("plain text, no code blocks");
700 assert!(blocks.is_empty());
701 }
702
703 #[test]
704 fn unclosed_block_ignored() {
705 let blocks = extract_scrape_blocks("```scrape\n{\"url\":\"https://x.com\"}");
706 assert!(blocks.is_empty());
707 }
708
709 #[test]
710 fn non_scrape_block_ignored() {
711 let text =
712 "```bash\necho hi\n```\n```scrape\n{\"url\":\"https://x.com\",\"select\":\"h1\"}\n```";
713 let blocks = extract_scrape_blocks(text);
714 assert_eq!(blocks.len(), 1);
715 assert!(blocks[0].contains("x.com"));
716 }
717
718 #[test]
719 fn multiline_json_block() {
720 let text =
721 "```scrape\n{\n \"url\": \"https://example.com\",\n \"select\": \"h1\"\n}\n```";
722 let blocks = extract_scrape_blocks(text);
723 assert_eq!(blocks.len(), 1);
724 let instr: ScrapeInstruction = serde_json::from_str(blocks[0]).unwrap();
725 assert_eq!(instr.url, "https://example.com");
726 }
727
728 #[test]
731 fn parse_valid_instruction() {
732 let json = r#"{"url":"https://example.com","select":"h1","extract":"text","limit":5}"#;
733 let instr: ScrapeInstruction = serde_json::from_str(json).unwrap();
734 assert_eq!(instr.url, "https://example.com");
735 assert_eq!(instr.select, "h1");
736 assert_eq!(instr.extract, "text");
737 assert_eq!(instr.limit, Some(5));
738 }
739
740 #[test]
741 fn parse_minimal_instruction() {
742 let json = r#"{"url":"https://example.com","select":"p"}"#;
743 let instr: ScrapeInstruction = serde_json::from_str(json).unwrap();
744 assert_eq!(instr.extract, "text");
745 assert!(instr.limit.is_none());
746 }
747
748 #[test]
749 fn parse_attr_extract() {
750 let json = r#"{"url":"https://example.com","select":"a","extract":"attr:href"}"#;
751 let instr: ScrapeInstruction = serde_json::from_str(json).unwrap();
752 assert_eq!(instr.extract, "attr:href");
753 }
754
755 #[test]
756 fn parse_invalid_json_errors() {
757 let result = serde_json::from_str::<ScrapeInstruction>("not json");
758 assert!(result.is_err());
759 }
760
761 #[test]
764 fn extract_mode_text() {
765 assert!(matches!(ExtractMode::parse("text"), ExtractMode::Text));
766 }
767
768 #[test]
769 fn extract_mode_html() {
770 assert!(matches!(ExtractMode::parse("html"), ExtractMode::Html));
771 }
772
773 #[test]
774 fn extract_mode_attr() {
775 let mode = ExtractMode::parse("attr:href");
776 assert!(matches!(mode, ExtractMode::Attr(ref s) if s == "href"));
777 }
778
779 #[test]
780 fn extract_mode_unknown_defaults_to_text() {
781 assert!(matches!(ExtractMode::parse("unknown"), ExtractMode::Text));
782 }
783
784 #[test]
787 fn valid_https_url() {
788 assert!(validate_url("https://example.com").is_ok());
789 }
790
791 #[test]
792 fn http_rejected() {
793 let err = validate_url("http://example.com").unwrap_err();
794 assert!(matches!(err, ToolError::Blocked { .. }));
795 }
796
797 #[test]
798 fn ftp_rejected() {
799 let err = validate_url("ftp://files.example.com").unwrap_err();
800 assert!(matches!(err, ToolError::Blocked { .. }));
801 }
802
803 #[test]
804 fn file_rejected() {
805 let err = validate_url("file:///etc/passwd").unwrap_err();
806 assert!(matches!(err, ToolError::Blocked { .. }));
807 }
808
809 #[test]
810 fn invalid_url_rejected() {
811 let err = validate_url("not a url").unwrap_err();
812 assert!(matches!(err, ToolError::Blocked { .. }));
813 }
814
815 #[test]
816 fn localhost_blocked() {
817 let err = validate_url("https://localhost/path").unwrap_err();
818 assert!(matches!(err, ToolError::Blocked { .. }));
819 }
820
821 #[test]
822 fn loopback_ip_blocked() {
823 let err = validate_url("https://127.0.0.1/path").unwrap_err();
824 assert!(matches!(err, ToolError::Blocked { .. }));
825 }
826
827 #[test]
828 fn private_10_blocked() {
829 let err = validate_url("https://10.0.0.1/api").unwrap_err();
830 assert!(matches!(err, ToolError::Blocked { .. }));
831 }
832
833 #[test]
834 fn private_172_blocked() {
835 let err = validate_url("https://172.16.0.1/api").unwrap_err();
836 assert!(matches!(err, ToolError::Blocked { .. }));
837 }
838
839 #[test]
840 fn private_192_blocked() {
841 let err = validate_url("https://192.168.1.1/api").unwrap_err();
842 assert!(matches!(err, ToolError::Blocked { .. }));
843 }
844
845 #[test]
846 fn ipv6_loopback_blocked() {
847 let err = validate_url("https://[::1]/path").unwrap_err();
848 assert!(matches!(err, ToolError::Blocked { .. }));
849 }
850
851 #[test]
852 fn public_ip_allowed() {
853 assert!(validate_url("https://93.184.216.34/page").is_ok());
854 }
855
856 #[test]
859 fn extract_text_from_html() {
860 let html = "<html><body><h1>Hello World</h1><p>Content</p></body></html>";
861 let result = parse_and_extract(html, "h1", &ExtractMode::Text, 10).unwrap();
862 assert_eq!(result, "Hello World");
863 }
864
865 #[test]
866 fn extract_multiple_elements() {
867 let html = "<ul><li>A</li><li>B</li><li>C</li></ul>";
868 let result = parse_and_extract(html, "li", &ExtractMode::Text, 10).unwrap();
869 assert_eq!(result, "A\nB\nC");
870 }
871
872 #[test]
873 fn extract_with_limit() {
874 let html = "<ul><li>A</li><li>B</li><li>C</li></ul>";
875 let result = parse_and_extract(html, "li", &ExtractMode::Text, 2).unwrap();
876 assert_eq!(result, "A\nB");
877 }
878
879 #[test]
880 fn extract_attr_href() {
881 let html = r#"<a href="https://example.com">Link</a>"#;
882 let result =
883 parse_and_extract(html, "a", &ExtractMode::Attr("href".to_owned()), 10).unwrap();
884 assert_eq!(result, "https://example.com");
885 }
886
887 #[test]
888 fn extract_inner_html() {
889 let html = "<div><span>inner</span></div>";
890 let result = parse_and_extract(html, "div", &ExtractMode::Html, 10).unwrap();
891 assert!(result.contains("<span>inner</span>"));
892 }
893
894 #[test]
895 fn no_matches_returns_message() {
896 let html = "<html><body><p>text</p></body></html>";
897 let result = parse_and_extract(html, "h1", &ExtractMode::Text, 10).unwrap();
898 assert!(result.starts_with("No results for selector:"));
899 }
900
901 #[test]
902 fn empty_text_skipped() {
903 let html = "<ul><li> </li><li>A</li></ul>";
904 let result = parse_and_extract(html, "li", &ExtractMode::Text, 10).unwrap();
905 assert_eq!(result, "A");
906 }
907
908 #[test]
909 fn invalid_selector_errors() {
910 let html = "<html><body></body></html>";
911 let result = parse_and_extract(html, "[[[invalid", &ExtractMode::Text, 10);
912 assert!(result.is_err());
913 }
914
915 #[test]
916 fn empty_html_returns_no_results() {
917 let result = parse_and_extract("", "h1", &ExtractMode::Text, 10).unwrap();
918 assert!(result.starts_with("No results for selector:"));
919 }
920
921 #[test]
922 fn nested_selector() {
923 let html = "<div><span>inner</span></div><span>outer</span>";
924 let result = parse_and_extract(html, "div > span", &ExtractMode::Text, 10).unwrap();
925 assert_eq!(result, "inner");
926 }
927
928 #[test]
929 fn attr_missing_returns_empty() {
930 let html = r"<a>No href</a>";
931 let result =
932 parse_and_extract(html, "a", &ExtractMode::Attr("href".to_owned()), 10).unwrap();
933 assert!(result.starts_with("No results for selector:"));
934 }
935
936 #[test]
937 fn extract_html_mode() {
938 let html = "<div><b>bold</b> text</div>";
939 let result = parse_and_extract(html, "div", &ExtractMode::Html, 10).unwrap();
940 assert!(result.contains("<b>bold</b>"));
941 }
942
943 #[test]
944 fn limit_zero_returns_no_results() {
945 let html = "<ul><li>A</li><li>B</li></ul>";
946 let result = parse_and_extract(html, "li", &ExtractMode::Text, 0).unwrap();
947 assert!(result.starts_with("No results for selector:"));
948 }
949
950 #[test]
953 fn url_with_port_allowed() {
954 assert!(validate_url("https://example.com:8443/path").is_ok());
955 }
956
957 #[test]
958 fn link_local_ip_blocked() {
959 let err = validate_url("https://169.254.1.1/path").unwrap_err();
960 assert!(matches!(err, ToolError::Blocked { .. }));
961 }
962
963 #[test]
964 fn url_no_scheme_rejected() {
965 let err = validate_url("example.com/path").unwrap_err();
966 assert!(matches!(err, ToolError::Blocked { .. }));
967 }
968
969 #[test]
970 fn unspecified_ipv4_blocked() {
971 let err = validate_url("https://0.0.0.0/path").unwrap_err();
972 assert!(matches!(err, ToolError::Blocked { .. }));
973 }
974
975 #[test]
976 fn broadcast_ipv4_blocked() {
977 let err = validate_url("https://255.255.255.255/path").unwrap_err();
978 assert!(matches!(err, ToolError::Blocked { .. }));
979 }
980
981 #[test]
982 fn ipv6_link_local_blocked() {
983 let err = validate_url("https://[fe80::1]/path").unwrap_err();
984 assert!(matches!(err, ToolError::Blocked { .. }));
985 }
986
987 #[test]
988 fn ipv6_unique_local_blocked() {
989 let err = validate_url("https://[fd12::1]/path").unwrap_err();
990 assert!(matches!(err, ToolError::Blocked { .. }));
991 }
992
993 #[test]
994 fn ipv4_mapped_ipv6_loopback_blocked() {
995 let err = validate_url("https://[::ffff:127.0.0.1]/path").unwrap_err();
996 assert!(matches!(err, ToolError::Blocked { .. }));
997 }
998
999 #[test]
1000 fn ipv4_mapped_ipv6_private_blocked() {
1001 let err = validate_url("https://[::ffff:10.0.0.1]/path").unwrap_err();
1002 assert!(matches!(err, ToolError::Blocked { .. }));
1003 }
1004
1005 #[tokio::test]
1008 async fn executor_no_blocks_returns_none() {
1009 let config = ScrapeConfig::default();
1010 let executor = WebScrapeExecutor::new(&config);
1011 let result = executor.execute("plain text").await;
1012 assert!(result.unwrap().is_none());
1013 }
1014
1015 #[tokio::test]
1016 async fn executor_invalid_json_errors() {
1017 let config = ScrapeConfig::default();
1018 let executor = WebScrapeExecutor::new(&config);
1019 let response = "```scrape\nnot json\n```";
1020 let result = executor.execute(response).await;
1021 assert!(matches!(result, Err(ToolError::Execution(_))));
1022 }
1023
1024 #[tokio::test]
1025 async fn executor_blocked_url_errors() {
1026 let config = ScrapeConfig::default();
1027 let executor = WebScrapeExecutor::new(&config);
1028 let response = "```scrape\n{\"url\":\"http://example.com\",\"select\":\"h1\"}\n```";
1029 let result = executor.execute(response).await;
1030 assert!(matches!(result, Err(ToolError::Blocked { .. })));
1031 }
1032
1033 #[tokio::test]
1034 async fn executor_private_ip_blocked() {
1035 let config = ScrapeConfig::default();
1036 let executor = WebScrapeExecutor::new(&config);
1037 let response = "```scrape\n{\"url\":\"https://192.168.1.1/api\",\"select\":\"h1\"}\n```";
1038 let result = executor.execute(response).await;
1039 assert!(matches!(result, Err(ToolError::Blocked { .. })));
1040 }
1041
1042 #[tokio::test]
1043 async fn executor_unreachable_host_returns_error() {
1044 let config = ScrapeConfig {
1045 timeout: 1,
1046 max_body_bytes: 1_048_576,
1047 ..Default::default()
1048 };
1049 let executor = WebScrapeExecutor::new(&config);
1050 let response = "```scrape\n{\"url\":\"https://192.0.2.1:1/page\",\"select\":\"h1\"}\n```";
1051 let result = executor.execute(response).await;
1052 assert!(matches!(result, Err(ToolError::Execution(_))));
1053 }
1054
1055 #[tokio::test]
1056 async fn executor_localhost_url_blocked() {
1057 let config = ScrapeConfig::default();
1058 let executor = WebScrapeExecutor::new(&config);
1059 let response = "```scrape\n{\"url\":\"https://localhost:9999/api\",\"select\":\"h1\"}\n```";
1060 let result = executor.execute(response).await;
1061 assert!(matches!(result, Err(ToolError::Blocked { .. })));
1062 }
1063
1064 #[tokio::test]
1065 async fn executor_empty_text_returns_none() {
1066 let config = ScrapeConfig::default();
1067 let executor = WebScrapeExecutor::new(&config);
1068 let result = executor.execute("").await;
1069 assert!(result.unwrap().is_none());
1070 }
1071
1072 #[tokio::test]
1073 async fn executor_multiple_blocks_first_blocked() {
1074 let config = ScrapeConfig::default();
1075 let executor = WebScrapeExecutor::new(&config);
1076 let response = "```scrape\n{\"url\":\"http://evil.com\",\"select\":\"h1\"}\n```\n\
1077 ```scrape\n{\"url\":\"https://ok.com\",\"select\":\"h1\"}\n```";
1078 let result = executor.execute(response).await;
1079 assert!(result.is_err());
1080 }
1081
1082 #[test]
1083 fn validate_url_empty_string() {
1084 let err = validate_url("").unwrap_err();
1085 assert!(matches!(err, ToolError::Blocked { .. }));
1086 }
1087
1088 #[test]
1089 fn validate_url_javascript_scheme_blocked() {
1090 let err = validate_url("javascript:alert(1)").unwrap_err();
1091 assert!(matches!(err, ToolError::Blocked { .. }));
1092 }
1093
1094 #[test]
1095 fn validate_url_data_scheme_blocked() {
1096 let err = validate_url("data:text/html,<h1>hi</h1>").unwrap_err();
1097 assert!(matches!(err, ToolError::Blocked { .. }));
1098 }
1099
1100 #[test]
1101 fn is_private_host_public_domain_is_false() {
1102 let host: url::Host<&str> = url::Host::Domain("example.com");
1103 assert!(!is_private_host(&host));
1104 }
1105
1106 #[test]
1107 fn is_private_host_localhost_is_true() {
1108 let host: url::Host<&str> = url::Host::Domain("localhost");
1109 assert!(is_private_host(&host));
1110 }
1111
1112 #[test]
1113 fn is_private_host_ipv6_unspecified_is_true() {
1114 let host = url::Host::Ipv6(std::net::Ipv6Addr::UNSPECIFIED);
1115 assert!(is_private_host(&host));
1116 }
1117
1118 #[test]
1119 fn is_private_host_public_ipv6_is_false() {
1120 let host = url::Host::Ipv6("2001:db8::1".parse().unwrap());
1121 assert!(!is_private_host(&host));
1122 }
1123
1124 async fn mock_server_executor() -> (WebScrapeExecutor, wiremock::MockServer) {
1135 let server = wiremock::MockServer::start().await;
1136 let executor = WebScrapeExecutor {
1137 timeout: Duration::from_secs(5),
1138 max_body_bytes: 1_048_576,
1139 allowed_domains: vec![],
1140 denied_domains: vec![],
1141 audit_logger: None,
1142 };
1143 (executor, server)
1144 }
1145
1146 fn server_host_and_addr(server: &wiremock::MockServer) -> (String, Vec<std::net::SocketAddr>) {
1148 let uri = server.uri();
1149 let url = Url::parse(&uri).unwrap();
1150 let host = url.host_str().unwrap_or("127.0.0.1").to_owned();
1151 let port = url.port().unwrap_or(80);
1152 let addr: std::net::SocketAddr = format!("{host}:{port}").parse().unwrap();
1153 (host, vec![addr])
1154 }
1155
1156 async fn follow_redirects_raw(
1160 executor: &WebScrapeExecutor,
1161 start_url: &str,
1162 host: &str,
1163 addrs: &[std::net::SocketAddr],
1164 ) -> Result<String, ToolError> {
1165 const MAX_REDIRECTS: usize = 3;
1166 let mut current_url = start_url.to_owned();
1167 let mut current_host = host.to_owned();
1168 let mut current_addrs = addrs.to_vec();
1169
1170 for hop in 0..=MAX_REDIRECTS {
1171 let client = executor.build_client(¤t_host, ¤t_addrs);
1172 let resp = client
1173 .get(¤t_url)
1174 .send()
1175 .await
1176 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())))?;
1177
1178 let status = resp.status();
1179
1180 if status.is_redirection() {
1181 if hop == MAX_REDIRECTS {
1182 return Err(ToolError::Execution(std::io::Error::other(
1183 "too many redirects",
1184 )));
1185 }
1186
1187 let location = resp
1188 .headers()
1189 .get(reqwest::header::LOCATION)
1190 .and_then(|v| v.to_str().ok())
1191 .ok_or_else(|| {
1192 ToolError::Execution(std::io::Error::other("redirect with no Location"))
1193 })?;
1194
1195 let base = Url::parse(¤t_url)
1196 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())))?;
1197 let next_url = base
1198 .join(location)
1199 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())))?;
1200
1201 current_url = next_url.to_string();
1203 let _ = &mut current_host;
1205 let _ = &mut current_addrs;
1206 continue;
1207 }
1208
1209 if !status.is_success() {
1210 return Err(ToolError::Execution(std::io::Error::other(format!(
1211 "HTTP {status}",
1212 ))));
1213 }
1214
1215 let bytes = resp
1216 .bytes()
1217 .await
1218 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())))?;
1219
1220 if bytes.len() > executor.max_body_bytes {
1221 return Err(ToolError::Execution(std::io::Error::other(format!(
1222 "response too large: {} bytes (max: {})",
1223 bytes.len(),
1224 executor.max_body_bytes,
1225 ))));
1226 }
1227
1228 return String::from_utf8(bytes.to_vec())
1229 .map_err(|e| ToolError::Execution(std::io::Error::other(e.to_string())));
1230 }
1231
1232 Err(ToolError::Execution(std::io::Error::other(
1233 "too many redirects",
1234 )))
1235 }
1236
1237 #[tokio::test]
1238 async fn fetch_html_success_returns_body() {
1239 use wiremock::matchers::{method, path};
1240 use wiremock::{Mock, ResponseTemplate};
1241
1242 let (executor, server) = mock_server_executor().await;
1243 Mock::given(method("GET"))
1244 .and(path("/page"))
1245 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>OK</h1>"))
1246 .mount(&server)
1247 .await;
1248
1249 let (host, addrs) = server_host_and_addr(&server);
1250 let url = format!("{}/page", server.uri());
1251 let result = executor.fetch_html(&url, &host, &addrs).await;
1252 assert!(result.is_ok(), "expected Ok, got: {result:?}");
1253 assert_eq!(result.unwrap(), "<h1>OK</h1>");
1254 }
1255
1256 #[tokio::test]
1257 async fn fetch_html_non_2xx_returns_error() {
1258 use wiremock::matchers::{method, path};
1259 use wiremock::{Mock, ResponseTemplate};
1260
1261 let (executor, server) = mock_server_executor().await;
1262 Mock::given(method("GET"))
1263 .and(path("/forbidden"))
1264 .respond_with(ResponseTemplate::new(403))
1265 .mount(&server)
1266 .await;
1267
1268 let (host, addrs) = server_host_and_addr(&server);
1269 let url = format!("{}/forbidden", server.uri());
1270 let result = executor.fetch_html(&url, &host, &addrs).await;
1271 assert!(result.is_err());
1272 let msg = result.unwrap_err().to_string();
1273 assert!(msg.contains("403"), "expected 403 in error: {msg}");
1274 }
1275
1276 #[tokio::test]
1277 async fn fetch_html_404_returns_error() {
1278 use wiremock::matchers::{method, path};
1279 use wiremock::{Mock, ResponseTemplate};
1280
1281 let (executor, server) = mock_server_executor().await;
1282 Mock::given(method("GET"))
1283 .and(path("/missing"))
1284 .respond_with(ResponseTemplate::new(404))
1285 .mount(&server)
1286 .await;
1287
1288 let (host, addrs) = server_host_and_addr(&server);
1289 let url = format!("{}/missing", server.uri());
1290 let result = executor.fetch_html(&url, &host, &addrs).await;
1291 assert!(result.is_err());
1292 let msg = result.unwrap_err().to_string();
1293 assert!(msg.contains("404"), "expected 404 in error: {msg}");
1294 }
1295
1296 #[tokio::test]
1297 async fn fetch_html_redirect_no_location_returns_error() {
1298 use wiremock::matchers::{method, path};
1299 use wiremock::{Mock, ResponseTemplate};
1300
1301 let (executor, server) = mock_server_executor().await;
1302 Mock::given(method("GET"))
1304 .and(path("/redirect-no-loc"))
1305 .respond_with(ResponseTemplate::new(302))
1306 .mount(&server)
1307 .await;
1308
1309 let (host, addrs) = server_host_and_addr(&server);
1310 let url = format!("{}/redirect-no-loc", server.uri());
1311 let result = executor.fetch_html(&url, &host, &addrs).await;
1312 assert!(result.is_err());
1313 let msg = result.unwrap_err().to_string();
1314 assert!(
1315 msg.contains("Location") || msg.contains("location"),
1316 "expected Location-related error: {msg}"
1317 );
1318 }
1319
1320 #[tokio::test]
1321 async fn fetch_html_single_redirect_followed() {
1322 use wiremock::matchers::{method, path};
1323 use wiremock::{Mock, ResponseTemplate};
1324
1325 let (executor, server) = mock_server_executor().await;
1326 let final_url = format!("{}/final", server.uri());
1327
1328 Mock::given(method("GET"))
1329 .and(path("/start"))
1330 .respond_with(ResponseTemplate::new(302).insert_header("location", final_url.as_str()))
1331 .mount(&server)
1332 .await;
1333
1334 Mock::given(method("GET"))
1335 .and(path("/final"))
1336 .respond_with(ResponseTemplate::new(200).set_body_string("<p>final</p>"))
1337 .mount(&server)
1338 .await;
1339
1340 let (host, addrs) = server_host_and_addr(&server);
1341 let url = format!("{}/start", server.uri());
1342 let result = follow_redirects_raw(&executor, &url, &host, &addrs).await;
1343 assert!(result.is_ok(), "single redirect should succeed: {result:?}");
1344 assert_eq!(result.unwrap(), "<p>final</p>");
1345 }
1346
1347 #[tokio::test]
1348 async fn fetch_html_three_redirects_allowed() {
1349 use wiremock::matchers::{method, path};
1350 use wiremock::{Mock, ResponseTemplate};
1351
1352 let (executor, server) = mock_server_executor().await;
1353 let hop2 = format!("{}/hop2", server.uri());
1354 let hop3 = format!("{}/hop3", server.uri());
1355 let final_dest = format!("{}/done", server.uri());
1356
1357 Mock::given(method("GET"))
1358 .and(path("/hop1"))
1359 .respond_with(ResponseTemplate::new(301).insert_header("location", hop2.as_str()))
1360 .mount(&server)
1361 .await;
1362 Mock::given(method("GET"))
1363 .and(path("/hop2"))
1364 .respond_with(ResponseTemplate::new(301).insert_header("location", hop3.as_str()))
1365 .mount(&server)
1366 .await;
1367 Mock::given(method("GET"))
1368 .and(path("/hop3"))
1369 .respond_with(ResponseTemplate::new(301).insert_header("location", final_dest.as_str()))
1370 .mount(&server)
1371 .await;
1372 Mock::given(method("GET"))
1373 .and(path("/done"))
1374 .respond_with(ResponseTemplate::new(200).set_body_string("<p>done</p>"))
1375 .mount(&server)
1376 .await;
1377
1378 let (host, addrs) = server_host_and_addr(&server);
1379 let url = format!("{}/hop1", server.uri());
1380 let result = follow_redirects_raw(&executor, &url, &host, &addrs).await;
1381 assert!(result.is_ok(), "3 redirects should succeed: {result:?}");
1382 assert_eq!(result.unwrap(), "<p>done</p>");
1383 }
1384
1385 #[tokio::test]
1386 async fn fetch_html_four_redirects_rejected() {
1387 use wiremock::matchers::{method, path};
1388 use wiremock::{Mock, ResponseTemplate};
1389
1390 let (executor, server) = mock_server_executor().await;
1391 let hop2 = format!("{}/r2", server.uri());
1392 let hop3 = format!("{}/r3", server.uri());
1393 let hop4 = format!("{}/r4", server.uri());
1394 let hop5 = format!("{}/r5", server.uri());
1395
1396 for (from, to) in [
1397 ("/r1", &hop2),
1398 ("/r2", &hop3),
1399 ("/r3", &hop4),
1400 ("/r4", &hop5),
1401 ] {
1402 Mock::given(method("GET"))
1403 .and(path(from))
1404 .respond_with(ResponseTemplate::new(301).insert_header("location", to.as_str()))
1405 .mount(&server)
1406 .await;
1407 }
1408
1409 let (host, addrs) = server_host_and_addr(&server);
1410 let url = format!("{}/r1", server.uri());
1411 let result = follow_redirects_raw(&executor, &url, &host, &addrs).await;
1412 assert!(result.is_err(), "4 redirects should be rejected");
1413 let msg = result.unwrap_err().to_string();
1414 assert!(
1415 msg.contains("redirect"),
1416 "expected redirect-related error: {msg}"
1417 );
1418 }
1419
1420 #[tokio::test]
1421 async fn fetch_html_body_too_large_returns_error() {
1422 use wiremock::matchers::{method, path};
1423 use wiremock::{Mock, ResponseTemplate};
1424
1425 let small_limit_executor = WebScrapeExecutor {
1426 timeout: Duration::from_secs(5),
1427 max_body_bytes: 10,
1428 allowed_domains: vec![],
1429 denied_domains: vec![],
1430 audit_logger: None,
1431 };
1432 let server = wiremock::MockServer::start().await;
1433 Mock::given(method("GET"))
1434 .and(path("/big"))
1435 .respond_with(
1436 ResponseTemplate::new(200)
1437 .set_body_string("this body is definitely longer than ten bytes"),
1438 )
1439 .mount(&server)
1440 .await;
1441
1442 let (host, addrs) = server_host_and_addr(&server);
1443 let url = format!("{}/big", server.uri());
1444 let result = small_limit_executor.fetch_html(&url, &host, &addrs).await;
1445 assert!(result.is_err());
1446 let msg = result.unwrap_err().to_string();
1447 assert!(msg.contains("too large"), "expected too-large error: {msg}");
1448 }
1449
1450 #[test]
1451 fn extract_scrape_blocks_empty_block_content() {
1452 let text = "```scrape\n\n```";
1453 let blocks = extract_scrape_blocks(text);
1454 assert_eq!(blocks.len(), 1);
1455 assert!(blocks[0].is_empty());
1456 }
1457
1458 #[test]
1459 fn extract_scrape_blocks_whitespace_only() {
1460 let text = "```scrape\n \n```";
1461 let blocks = extract_scrape_blocks(text);
1462 assert_eq!(blocks.len(), 1);
1463 }
1464
1465 #[test]
1466 fn parse_and_extract_multiple_selectors() {
1467 let html = "<div><h1>Title</h1><p>Para</p></div>";
1468 let result = parse_and_extract(html, "h1, p", &ExtractMode::Text, 10).unwrap();
1469 assert!(result.contains("Title"));
1470 assert!(result.contains("Para"));
1471 }
1472
1473 #[test]
1474 fn webscrape_executor_new_with_custom_config() {
1475 let config = ScrapeConfig {
1476 timeout: 60,
1477 max_body_bytes: 512,
1478 ..Default::default()
1479 };
1480 let executor = WebScrapeExecutor::new(&config);
1481 assert_eq!(executor.max_body_bytes, 512);
1482 }
1483
1484 #[test]
1485 fn webscrape_executor_debug() {
1486 let config = ScrapeConfig::default();
1487 let executor = WebScrapeExecutor::new(&config);
1488 let dbg = format!("{executor:?}");
1489 assert!(dbg.contains("WebScrapeExecutor"));
1490 }
1491
1492 #[test]
1493 fn extract_mode_attr_empty_name() {
1494 let mode = ExtractMode::parse("attr:");
1495 assert!(matches!(mode, ExtractMode::Attr(ref s) if s.is_empty()));
1496 }
1497
1498 #[test]
1499 fn default_extract_returns_text() {
1500 assert_eq!(default_extract(), "text");
1501 }
1502
1503 #[test]
1504 fn scrape_instruction_debug() {
1505 let json = r#"{"url":"https://example.com","select":"h1"}"#;
1506 let instr: ScrapeInstruction = serde_json::from_str(json).unwrap();
1507 let dbg = format!("{instr:?}");
1508 assert!(dbg.contains("ScrapeInstruction"));
1509 }
1510
1511 #[test]
1512 fn extract_mode_debug() {
1513 let mode = ExtractMode::Text;
1514 let dbg = format!("{mode:?}");
1515 assert!(dbg.contains("Text"));
1516 }
1517
1518 #[test]
1523 fn max_redirects_constant_is_three() {
1524 const MAX_REDIRECTS: usize = 3;
1528 assert_eq!(MAX_REDIRECTS, 3, "fetch_html allows exactly 3 redirects");
1529 }
1530
1531 #[test]
1534 fn redirect_no_location_error_message() {
1535 let err = std::io::Error::other("redirect with no Location");
1536 assert!(err.to_string().contains("redirect with no Location"));
1537 }
1538
1539 #[test]
1541 fn too_many_redirects_error_message() {
1542 let err = std::io::Error::other("too many redirects");
1543 assert!(err.to_string().contains("too many redirects"));
1544 }
1545
1546 #[test]
1548 fn non_2xx_status_error_format() {
1549 let status = reqwest::StatusCode::FORBIDDEN;
1550 let msg = format!("HTTP {status}");
1551 assert!(msg.contains("403"));
1552 }
1553
1554 #[test]
1556 fn not_found_status_error_format() {
1557 let status = reqwest::StatusCode::NOT_FOUND;
1558 let msg = format!("HTTP {status}");
1559 assert!(msg.contains("404"));
1560 }
1561
1562 #[test]
1564 fn relative_redirect_same_host_path() {
1565 let base = Url::parse("https://example.com/current").unwrap();
1566 let resolved = base.join("/other").unwrap();
1567 assert_eq!(resolved.as_str(), "https://example.com/other");
1568 }
1569
1570 #[test]
1572 fn relative_redirect_relative_path() {
1573 let base = Url::parse("https://example.com/a/b").unwrap();
1574 let resolved = base.join("c").unwrap();
1575 assert_eq!(resolved.as_str(), "https://example.com/a/c");
1576 }
1577
1578 #[test]
1580 fn absolute_redirect_overrides_base() {
1581 let base = Url::parse("https://example.com/page").unwrap();
1582 let resolved = base.join("https://other.com/target").unwrap();
1583 assert_eq!(resolved.as_str(), "https://other.com/target");
1584 }
1585
1586 #[test]
1588 fn redirect_http_downgrade_rejected() {
1589 let location = "http://example.com/page";
1590 let base = Url::parse("https://example.com/start").unwrap();
1591 let next = base.join(location).unwrap();
1592 let err = validate_url(next.as_str()).unwrap_err();
1593 assert!(matches!(err, ToolError::Blocked { .. }));
1594 }
1595
1596 #[test]
1598 fn redirect_location_private_ip_blocked() {
1599 let location = "https://192.168.100.1/admin";
1600 let base = Url::parse("https://example.com/start").unwrap();
1601 let next = base.join(location).unwrap();
1602 let err = validate_url(next.as_str()).unwrap_err();
1603 assert!(matches!(err, ToolError::Blocked { .. }));
1604 let ToolError::Blocked { command: cmd } = err else {
1605 panic!("expected Blocked");
1606 };
1607 assert!(
1608 cmd.contains("private") || cmd.contains("scheme"),
1609 "error message should describe the block reason: {cmd}"
1610 );
1611 }
1612
1613 #[test]
1615 fn redirect_location_internal_domain_blocked() {
1616 let location = "https://metadata.internal/latest/meta-data/";
1617 let base = Url::parse("https://example.com/start").unwrap();
1618 let next = base.join(location).unwrap();
1619 let err = validate_url(next.as_str()).unwrap_err();
1620 assert!(matches!(err, ToolError::Blocked { .. }));
1621 }
1622
1623 #[test]
1625 fn redirect_chain_three_hops_all_public() {
1626 let hops = [
1627 "https://redirect1.example.com/hop1",
1628 "https://redirect2.example.com/hop2",
1629 "https://destination.example.com/final",
1630 ];
1631 for hop in hops {
1632 assert!(validate_url(hop).is_ok(), "expected ok for {hop}");
1633 }
1634 }
1635
1636 #[test]
1641 fn redirect_to_private_ip_rejected_by_validate_url() {
1642 let private_targets = [
1644 "https://127.0.0.1/secret",
1645 "https://10.0.0.1/internal",
1646 "https://192.168.1.1/admin",
1647 "https://172.16.0.1/data",
1648 "https://[::1]/path",
1649 "https://[fe80::1]/path",
1650 "https://localhost/path",
1651 "https://service.internal/api",
1652 ];
1653 for target in private_targets {
1654 let result = validate_url(target);
1655 assert!(result.is_err(), "expected error for {target}");
1656 assert!(
1657 matches!(result.unwrap_err(), ToolError::Blocked { .. }),
1658 "expected Blocked for {target}"
1659 );
1660 }
1661 }
1662
1663 #[test]
1665 fn redirect_relative_url_resolves_correctly() {
1666 let base = Url::parse("https://example.com/page").unwrap();
1667 let relative = "/other";
1668 let resolved = base.join(relative).unwrap();
1669 assert_eq!(resolved.as_str(), "https://example.com/other");
1670 }
1671
1672 #[test]
1674 fn redirect_to_http_rejected() {
1675 let err = validate_url("http://example.com/page").unwrap_err();
1676 assert!(matches!(err, ToolError::Blocked { .. }));
1677 }
1678
1679 #[test]
1680 fn ipv4_mapped_ipv6_link_local_blocked() {
1681 let err = validate_url("https://[::ffff:169.254.0.1]/path").unwrap_err();
1682 assert!(matches!(err, ToolError::Blocked { .. }));
1683 }
1684
1685 #[test]
1686 fn ipv4_mapped_ipv6_public_allowed() {
1687 assert!(validate_url("https://[::ffff:93.184.216.34]/path").is_ok());
1688 }
1689
1690 #[tokio::test]
1693 async fn fetch_http_scheme_blocked() {
1694 let config = ScrapeConfig::default();
1695 let executor = WebScrapeExecutor::new(&config);
1696 let call = crate::executor::ToolCall {
1697 tool_id: ToolName::new("fetch"),
1698 params: {
1699 let mut m = serde_json::Map::new();
1700 m.insert("url".to_owned(), serde_json::json!("http://example.com"));
1701 m
1702 },
1703 caller_id: None,
1704 };
1705 let result = executor.execute_tool_call(&call).await;
1706 assert!(matches!(result, Err(ToolError::Blocked { .. })));
1707 }
1708
1709 #[tokio::test]
1710 async fn fetch_private_ip_blocked() {
1711 let config = ScrapeConfig::default();
1712 let executor = WebScrapeExecutor::new(&config);
1713 let call = crate::executor::ToolCall {
1714 tool_id: ToolName::new("fetch"),
1715 params: {
1716 let mut m = serde_json::Map::new();
1717 m.insert(
1718 "url".to_owned(),
1719 serde_json::json!("https://192.168.1.1/secret"),
1720 );
1721 m
1722 },
1723 caller_id: None,
1724 };
1725 let result = executor.execute_tool_call(&call).await;
1726 assert!(matches!(result, Err(ToolError::Blocked { .. })));
1727 }
1728
1729 #[tokio::test]
1730 async fn fetch_localhost_blocked() {
1731 let config = ScrapeConfig::default();
1732 let executor = WebScrapeExecutor::new(&config);
1733 let call = crate::executor::ToolCall {
1734 tool_id: ToolName::new("fetch"),
1735 params: {
1736 let mut m = serde_json::Map::new();
1737 m.insert(
1738 "url".to_owned(),
1739 serde_json::json!("https://localhost/page"),
1740 );
1741 m
1742 },
1743 caller_id: None,
1744 };
1745 let result = executor.execute_tool_call(&call).await;
1746 assert!(matches!(result, Err(ToolError::Blocked { .. })));
1747 }
1748
1749 #[tokio::test]
1750 async fn fetch_unknown_tool_returns_none() {
1751 let config = ScrapeConfig::default();
1752 let executor = WebScrapeExecutor::new(&config);
1753 let call = crate::executor::ToolCall {
1754 tool_id: ToolName::new("unknown_tool"),
1755 params: serde_json::Map::new(),
1756 caller_id: None,
1757 };
1758 let result = executor.execute_tool_call(&call).await;
1759 assert!(result.unwrap().is_none());
1760 }
1761
1762 #[tokio::test]
1763 async fn fetch_returns_body_via_mock() {
1764 use wiremock::matchers::{method, path};
1765 use wiremock::{Mock, ResponseTemplate};
1766
1767 let (executor, server) = mock_server_executor().await;
1768 Mock::given(method("GET"))
1769 .and(path("/content"))
1770 .respond_with(ResponseTemplate::new(200).set_body_string("plain text content"))
1771 .mount(&server)
1772 .await;
1773
1774 let (host, addrs) = server_host_and_addr(&server);
1775 let url = format!("{}/content", server.uri());
1776 let result = executor.fetch_html(&url, &host, &addrs).await;
1777 assert!(result.is_ok());
1778 assert_eq!(result.unwrap(), "plain text content");
1779 }
1780
1781 #[test]
1782 fn tool_definitions_returns_web_scrape_and_fetch() {
1783 let config = ScrapeConfig::default();
1784 let executor = WebScrapeExecutor::new(&config);
1785 let defs = executor.tool_definitions();
1786 assert_eq!(defs.len(), 2);
1787 assert_eq!(defs[0].id, "web_scrape");
1788 assert_eq!(
1789 defs[0].invocation,
1790 crate::registry::InvocationHint::FencedBlock("scrape")
1791 );
1792 assert_eq!(defs[1].id, "fetch");
1793 assert_eq!(
1794 defs[1].invocation,
1795 crate::registry::InvocationHint::ToolCall
1796 );
1797 }
1798
1799 #[test]
1800 fn tool_definitions_schema_has_all_params() {
1801 let config = ScrapeConfig::default();
1802 let executor = WebScrapeExecutor::new(&config);
1803 let defs = executor.tool_definitions();
1804 let obj = defs[0].schema.as_object().unwrap();
1805 let props = obj["properties"].as_object().unwrap();
1806 assert!(props.contains_key("url"));
1807 assert!(props.contains_key("select"));
1808 assert!(props.contains_key("extract"));
1809 assert!(props.contains_key("limit"));
1810 let req = obj["required"].as_array().unwrap();
1811 assert!(req.iter().any(|v| v.as_str() == Some("url")));
1812 assert!(req.iter().any(|v| v.as_str() == Some("select")));
1813 assert!(!req.iter().any(|v| v.as_str() == Some("extract")));
1814 }
1815
1816 #[test]
1819 fn subdomain_localhost_blocked() {
1820 let host: url::Host<&str> = url::Host::Domain("foo.localhost");
1821 assert!(is_private_host(&host));
1822 }
1823
1824 #[test]
1825 fn internal_tld_blocked() {
1826 let host: url::Host<&str> = url::Host::Domain("service.internal");
1827 assert!(is_private_host(&host));
1828 }
1829
1830 #[test]
1831 fn local_tld_blocked() {
1832 let host: url::Host<&str> = url::Host::Domain("printer.local");
1833 assert!(is_private_host(&host));
1834 }
1835
1836 #[test]
1837 fn public_domain_not_blocked() {
1838 let host: url::Host<&str> = url::Host::Domain("example.com");
1839 assert!(!is_private_host(&host));
1840 }
1841
1842 #[tokio::test]
1845 async fn resolve_loopback_rejected() {
1846 let url = url::Url::parse("https://127.0.0.1/path").unwrap();
1848 let result = resolve_and_validate(&url).await;
1850 assert!(
1851 result.is_err(),
1852 "loopback IP must be rejected by resolve_and_validate"
1853 );
1854 let err = result.unwrap_err();
1855 assert!(matches!(err, crate::executor::ToolError::Blocked { .. }));
1856 }
1857
1858 #[tokio::test]
1859 async fn resolve_private_10_rejected() {
1860 let url = url::Url::parse("https://10.0.0.1/path").unwrap();
1861 let result = resolve_and_validate(&url).await;
1862 assert!(result.is_err());
1863 assert!(matches!(
1864 result.unwrap_err(),
1865 crate::executor::ToolError::Blocked { .. }
1866 ));
1867 }
1868
1869 #[tokio::test]
1870 async fn resolve_private_192_rejected() {
1871 let url = url::Url::parse("https://192.168.1.1/path").unwrap();
1872 let result = resolve_and_validate(&url).await;
1873 assert!(result.is_err());
1874 assert!(matches!(
1875 result.unwrap_err(),
1876 crate::executor::ToolError::Blocked { .. }
1877 ));
1878 }
1879
1880 #[tokio::test]
1881 async fn resolve_ipv6_loopback_rejected() {
1882 let url = url::Url::parse("https://[::1]/path").unwrap();
1883 let result = resolve_and_validate(&url).await;
1884 assert!(result.is_err());
1885 assert!(matches!(
1886 result.unwrap_err(),
1887 crate::executor::ToolError::Blocked { .. }
1888 ));
1889 }
1890
1891 #[tokio::test]
1892 async fn resolve_no_host_returns_ok() {
1893 let url = url::Url::parse("https://example.com/path").unwrap();
1895 let url_no_host = url::Url::parse("data:text/plain,hello").unwrap();
1897 let result = resolve_and_validate(&url_no_host).await;
1899 assert!(result.is_ok());
1900 let (host, addrs) = result.unwrap();
1901 assert!(host.is_empty());
1902 assert!(addrs.is_empty());
1903 drop(url);
1904 drop(url_no_host);
1905 }
1906
1907 async fn make_file_audit_logger(
1911 dir: &tempfile::TempDir,
1912 ) -> (
1913 std::sync::Arc<crate::audit::AuditLogger>,
1914 std::path::PathBuf,
1915 ) {
1916 use crate::audit::AuditLogger;
1917 use crate::config::AuditConfig;
1918 let path = dir.path().join("audit.log");
1919 let config = AuditConfig {
1920 enabled: true,
1921 destination: path.display().to_string(),
1922 ..Default::default()
1923 };
1924 let logger = std::sync::Arc::new(AuditLogger::from_config(&config, false).await.unwrap());
1925 (logger, path)
1926 }
1927
1928 #[tokio::test]
1929 async fn with_audit_sets_logger() {
1930 let config = ScrapeConfig::default();
1931 let executor = WebScrapeExecutor::new(&config);
1932 assert!(executor.audit_logger.is_none());
1933
1934 let dir = tempfile::tempdir().unwrap();
1935 let (logger, _path) = make_file_audit_logger(&dir).await;
1936 let executor = executor.with_audit(logger);
1937 assert!(executor.audit_logger.is_some());
1938 }
1939
1940 #[test]
1941 fn tool_error_to_audit_result_blocked_maps_correctly() {
1942 let err = ToolError::Blocked {
1943 command: "scheme not allowed: http".into(),
1944 };
1945 let result = tool_error_to_audit_result(&err);
1946 assert!(
1947 matches!(result, AuditResult::Blocked { reason } if reason == "scheme not allowed: http")
1948 );
1949 }
1950
1951 #[test]
1952 fn tool_error_to_audit_result_timeout_maps_correctly() {
1953 let err = ToolError::Timeout { timeout_secs: 15 };
1954 let result = tool_error_to_audit_result(&err);
1955 assert!(matches!(result, AuditResult::Timeout));
1956 }
1957
1958 #[test]
1959 fn tool_error_to_audit_result_execution_error_maps_correctly() {
1960 let err = ToolError::Execution(std::io::Error::other("connection refused"));
1961 let result = tool_error_to_audit_result(&err);
1962 assert!(
1963 matches!(result, AuditResult::Error { message } if message.contains("connection refused"))
1964 );
1965 }
1966
1967 #[tokio::test]
1968 async fn fetch_audit_blocked_url_logged() {
1969 let dir = tempfile::tempdir().unwrap();
1970 let (logger, log_path) = make_file_audit_logger(&dir).await;
1971
1972 let config = ScrapeConfig::default();
1973 let executor = WebScrapeExecutor::new(&config).with_audit(logger);
1974
1975 let call = crate::executor::ToolCall {
1976 tool_id: ToolName::new("fetch"),
1977 params: {
1978 let mut m = serde_json::Map::new();
1979 m.insert("url".to_owned(), serde_json::json!("http://example.com"));
1980 m
1981 },
1982 caller_id: None,
1983 };
1984 let result = executor.execute_tool_call(&call).await;
1985 assert!(matches!(result, Err(ToolError::Blocked { .. })));
1986
1987 let content = tokio::fs::read_to_string(&log_path).await.unwrap();
1988 assert!(
1989 content.contains("\"tool\":\"fetch\""),
1990 "expected tool=fetch in audit: {content}"
1991 );
1992 assert!(
1993 content.contains("\"type\":\"blocked\""),
1994 "expected type=blocked in audit: {content}"
1995 );
1996 assert!(
1997 content.contains("http://example.com"),
1998 "expected URL in audit command field: {content}"
1999 );
2000 }
2001
2002 #[tokio::test]
2003 async fn log_audit_success_writes_to_file() {
2004 let dir = tempfile::tempdir().unwrap();
2005 let (logger, log_path) = make_file_audit_logger(&dir).await;
2006
2007 let config = ScrapeConfig::default();
2008 let executor = WebScrapeExecutor::new(&config).with_audit(logger);
2009
2010 executor
2011 .log_audit(
2012 "fetch",
2013 "https://example.com/page",
2014 AuditResult::Success,
2015 42,
2016 None,
2017 )
2018 .await;
2019
2020 let content = tokio::fs::read_to_string(&log_path).await.unwrap();
2021 assert!(
2022 content.contains("\"tool\":\"fetch\""),
2023 "expected tool=fetch in audit: {content}"
2024 );
2025 assert!(
2026 content.contains("\"type\":\"success\""),
2027 "expected type=success in audit: {content}"
2028 );
2029 assert!(
2030 content.contains("\"command\":\"https://example.com/page\""),
2031 "expected command URL in audit: {content}"
2032 );
2033 assert!(
2034 content.contains("\"duration_ms\":42"),
2035 "expected duration_ms in audit: {content}"
2036 );
2037 }
2038
2039 #[tokio::test]
2040 async fn log_audit_blocked_writes_to_file() {
2041 let dir = tempfile::tempdir().unwrap();
2042 let (logger, log_path) = make_file_audit_logger(&dir).await;
2043
2044 let config = ScrapeConfig::default();
2045 let executor = WebScrapeExecutor::new(&config).with_audit(logger);
2046
2047 executor
2048 .log_audit(
2049 "web_scrape",
2050 "http://evil.com/page",
2051 AuditResult::Blocked {
2052 reason: "scheme not allowed: http".into(),
2053 },
2054 0,
2055 None,
2056 )
2057 .await;
2058
2059 let content = tokio::fs::read_to_string(&log_path).await.unwrap();
2060 assert!(
2061 content.contains("\"tool\":\"web_scrape\""),
2062 "expected tool=web_scrape in audit: {content}"
2063 );
2064 assert!(
2065 content.contains("\"type\":\"blocked\""),
2066 "expected type=blocked in audit: {content}"
2067 );
2068 assert!(
2069 content.contains("scheme not allowed"),
2070 "expected block reason in audit: {content}"
2071 );
2072 }
2073
2074 #[tokio::test]
2075 async fn web_scrape_audit_blocked_url_logged() {
2076 let dir = tempfile::tempdir().unwrap();
2077 let (logger, log_path) = make_file_audit_logger(&dir).await;
2078
2079 let config = ScrapeConfig::default();
2080 let executor = WebScrapeExecutor::new(&config).with_audit(logger);
2081
2082 let call = crate::executor::ToolCall {
2083 tool_id: ToolName::new("web_scrape"),
2084 params: {
2085 let mut m = serde_json::Map::new();
2086 m.insert("url".to_owned(), serde_json::json!("http://example.com"));
2087 m.insert("select".to_owned(), serde_json::json!("h1"));
2088 m
2089 },
2090 caller_id: None,
2091 };
2092 let result = executor.execute_tool_call(&call).await;
2093 assert!(matches!(result, Err(ToolError::Blocked { .. })));
2094
2095 let content = tokio::fs::read_to_string(&log_path).await.unwrap();
2096 assert!(
2097 content.contains("\"tool\":\"web_scrape\""),
2098 "expected tool=web_scrape in audit: {content}"
2099 );
2100 assert!(
2101 content.contains("\"type\":\"blocked\""),
2102 "expected type=blocked in audit: {content}"
2103 );
2104 }
2105
2106 #[tokio::test]
2107 async fn no_audit_logger_does_not_panic_on_blocked_fetch() {
2108 let config = ScrapeConfig::default();
2109 let executor = WebScrapeExecutor::new(&config);
2110 assert!(executor.audit_logger.is_none());
2111
2112 let call = crate::executor::ToolCall {
2113 tool_id: ToolName::new("fetch"),
2114 params: {
2115 let mut m = serde_json::Map::new();
2116 m.insert("url".to_owned(), serde_json::json!("http://example.com"));
2117 m
2118 },
2119 caller_id: None,
2120 };
2121 let result = executor.execute_tool_call(&call).await;
2123 assert!(matches!(result, Err(ToolError::Blocked { .. })));
2124 }
2125
2126 #[tokio::test]
2128 async fn fetch_execute_tool_call_end_to_end() {
2129 use wiremock::matchers::{method, path};
2130 use wiremock::{Mock, ResponseTemplate};
2131
2132 let (executor, server) = mock_server_executor().await;
2133 Mock::given(method("GET"))
2134 .and(path("/e2e"))
2135 .respond_with(ResponseTemplate::new(200).set_body_string("<h1>end-to-end</h1>"))
2136 .mount(&server)
2137 .await;
2138
2139 let (host, addrs) = server_host_and_addr(&server);
2140 let result = executor
2142 .fetch_html(&format!("{}/e2e", server.uri()), &host, &addrs)
2143 .await;
2144 assert!(result.is_ok());
2145 assert!(result.unwrap().contains("end-to-end"));
2146 }
2147
2148 #[test]
2151 fn domain_matches_exact() {
2152 assert!(domain_matches("example.com", "example.com"));
2153 assert!(!domain_matches("example.com", "other.com"));
2154 assert!(!domain_matches("example.com", "sub.example.com"));
2155 }
2156
2157 #[test]
2158 fn domain_matches_wildcard_single_subdomain() {
2159 assert!(domain_matches("*.example.com", "sub.example.com"));
2160 assert!(!domain_matches("*.example.com", "example.com"));
2161 assert!(!domain_matches("*.example.com", "sub.sub.example.com"));
2162 }
2163
2164 #[test]
2165 fn domain_matches_wildcard_does_not_match_empty_label() {
2166 assert!(!domain_matches("*.example.com", ".example.com"));
2168 }
2169
2170 #[test]
2171 fn domain_matches_multi_wildcard_treated_as_exact() {
2172 assert!(!domain_matches("*.*.example.com", "a.b.example.com"));
2174 }
2175
2176 #[test]
2179 fn check_domain_policy_empty_lists_allow_all() {
2180 assert!(check_domain_policy("example.com", &[], &[]).is_ok());
2181 assert!(check_domain_policy("evil.com", &[], &[]).is_ok());
2182 }
2183
2184 #[test]
2185 fn check_domain_policy_denylist_blocks() {
2186 let denied = vec!["evil.com".to_string()];
2187 let err = check_domain_policy("evil.com", &[], &denied).unwrap_err();
2188 assert!(matches!(err, ToolError::Blocked { .. }));
2189 }
2190
2191 #[test]
2192 fn check_domain_policy_denylist_does_not_block_other_domains() {
2193 let denied = vec!["evil.com".to_string()];
2194 assert!(check_domain_policy("good.com", &[], &denied).is_ok());
2195 }
2196
2197 #[test]
2198 fn check_domain_policy_allowlist_permits_matching() {
2199 let allowed = vec!["docs.rs".to_string(), "*.rust-lang.org".to_string()];
2200 assert!(check_domain_policy("docs.rs", &allowed, &[]).is_ok());
2201 assert!(check_domain_policy("blog.rust-lang.org", &allowed, &[]).is_ok());
2202 }
2203
2204 #[test]
2205 fn check_domain_policy_allowlist_blocks_unknown() {
2206 let allowed = vec!["docs.rs".to_string()];
2207 let err = check_domain_policy("other.com", &allowed, &[]).unwrap_err();
2208 assert!(matches!(err, ToolError::Blocked { .. }));
2209 }
2210
2211 #[test]
2212 fn check_domain_policy_deny_overrides_allow() {
2213 let allowed = vec!["example.com".to_string()];
2214 let denied = vec!["example.com".to_string()];
2215 let err = check_domain_policy("example.com", &allowed, &denied).unwrap_err();
2216 assert!(matches!(err, ToolError::Blocked { .. }));
2217 }
2218
2219 #[test]
2220 fn check_domain_policy_wildcard_in_denylist() {
2221 let denied = vec!["*.evil.com".to_string()];
2222 let err = check_domain_policy("sub.evil.com", &[], &denied).unwrap_err();
2223 assert!(matches!(err, ToolError::Blocked { .. }));
2224 assert!(check_domain_policy("evil.com", &[], &denied).is_ok());
2226 }
2227}