zeph_sanitizer/
exfiltration.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Exfiltration guards: prevent LLM-generated content from leaking data via
5//! outbound channels (markdown images, tool URL injection, poisoned memory writes).
6//!
7//! The [`ExfiltrationGuard`] is stateless and covers three attack vectors:
8//!
9//! 1. **Markdown image exfiltration** — an adversary plants `![t](https://evil.com/track.gif)`
10//!    in content. When the LLM echoes it, the rendered image loads silently, leaking session data.
11//!    [`ExfiltrationGuard::scan_output`] strips these and replaces them with `[image removed: …]`.
12//!
13//! 2. **URL injection via tool calls** — a flagged URL from untrusted tool output appears in a
14//!    subsequent tool call argument. [`ExfiltrationGuard::validate_tool_call`] cross-references
15//!    URLs against the per-turn flagged URL set. Flag-only approach (does not block execution).
16//!
17//! 3. **Poisoned memory writes** — content flagged with injection patterns is intercepted before
18//!    Qdrant embedding. [`ExfiltrationGuard::should_guard_memory_write`] signals the caller to
19//!    skip the embedding step, preventing poisoned content from polluting semantic search.
20//!
21//! # Phase 5 TODO
22//! - HTML img tag detection (`<img src="https://...">`) — requires HTML parser
23//! - Unicode zero-width joiner bypass (`!\u{200B}[alt](url)`) — requires Unicode-aware matching
24//! - Both are low-priority: the LLM context wrapper already limits what arrives here
25
26use std::collections::HashSet;
27use std::fmt::Write as _;
28use std::sync::LazyLock;
29
30use regex::Regex;
31use zeph_common::ToolName;
32
33pub use zeph_config::ExfiltrationGuardConfig;
34
35// ---------------------------------------------------------------------------
36// Regex patterns
37// ---------------------------------------------------------------------------
38
39/// Matches inline markdown images with external http/https URLs:
40/// `![alt text](https://example.com/track.gif)`
41///
42/// Local paths (`./img.png`) and data URIs (`data:image/...`) are intentionally
43/// excluded — they cannot exfiltrate data to a remote server.
44static MARKDOWN_IMAGE_RE: LazyLock<Regex> = LazyLock::new(|| {
45    Regex::new(r"!\[([^\]]*)\]\((https?://[^)]+)\)").expect("valid MARKDOWN_IMAGE_RE")
46});
47
48/// Matches reference-style markdown image declarations: `[ref]: https://example.com/img`
49/// Used in conjunction with `REFERENCE_LABEL_RE` to detect two-part reference images.
50static REFERENCE_DEF_RE: LazyLock<Regex> = LazyLock::new(|| {
51    Regex::new(r"(?m)^\[([^\]]+)\]:\s*(https?://\S+)").expect("valid REFERENCE_DEF_RE")
52});
53
54/// Matches reference-style image usages: `![alt][ref]`
55static REFERENCE_USAGE_RE: LazyLock<Regex> =
56    LazyLock::new(|| Regex::new(r"!\[([^\]]*)\]\[([^\]]+)\]").expect("valid REFERENCE_USAGE_RE"));
57
58/// Extracts http/https URLs from arbitrary text (used for tool argument scanning).
59static URL_EXTRACT_RE: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r#"https?://[^\s"'<>]+"#).expect("valid URL_EXTRACT_RE"));
61
62// ---------------------------------------------------------------------------
63// Event types
64// ---------------------------------------------------------------------------
65
66/// An exfiltration event detected by [`ExfiltrationGuard`].
67///
68/// Events are advisory: they are logged, counted, and returned to the caller for
69/// further action. The guard itself never panics or blocks the agent loop.
70///
71/// # Examples
72///
73/// ```rust
74/// use zeph_sanitizer::exfiltration::{ExfiltrationGuard, ExfiltrationEvent};
75/// use zeph_config::ExfiltrationGuardConfig;
76///
77/// let guard = ExfiltrationGuard::new(ExfiltrationGuardConfig::default());
78/// let (cleaned, events) = guard.scan_output("![t](https://evil.com/pixel.gif)");
79/// assert_eq!(events.len(), 1);
80/// assert!(matches!(&events[0], ExfiltrationEvent::MarkdownImageBlocked { url } if url.contains("evil.com")));
81/// ```
82#[derive(Debug, Clone, PartialEq)]
83pub enum ExfiltrationEvent {
84    /// A markdown image with an external URL was stripped from LLM output.
85    MarkdownImageBlocked { url: String },
86    /// A tool call argument contained a URL that appeared in untrusted flagged content.
87    SuspiciousToolUrl { url: String, tool_name: ToolName },
88    /// A memory write was intercepted because the content had injection flags.
89    MemoryWriteGuarded { reason: String },
90}
91
92// ---------------------------------------------------------------------------
93// Guard
94// ---------------------------------------------------------------------------
95
96/// Stateless exfiltration guard covering three outbound leak vectors.
97///
98/// Construct once from [`ExfiltrationGuardConfig`] and store on the agent. Cheap to clone.
99/// All three scanners ([`scan_output`](Self::scan_output),
100/// [`validate_tool_call`](Self::validate_tool_call),
101/// [`should_guard_memory_write`](Self::should_guard_memory_write)) are independently
102/// toggled via the config flags `block_markdown_images`, `validate_tool_urls`, and
103/// `guard_memory_writes`.
104///
105/// # Examples
106///
107/// ```rust
108/// use zeph_sanitizer::exfiltration::ExfiltrationGuard;
109/// use zeph_config::ExfiltrationGuardConfig;
110///
111/// let guard = ExfiltrationGuard::new(ExfiltrationGuardConfig::default());
112///
113/// // Strips external tracking pixels from LLM output.
114/// let (cleaned, events) = guard.scan_output("text ![track](https://evil.com/p.gif) end");
115/// assert!(events.len() == 1);
116/// assert!(!cleaned.contains("![track]"));
117///
118/// // Memory write is guarded when injection flags are present.
119/// let event = guard.should_guard_memory_write(true);
120/// assert!(event.is_some());
121/// ```
122#[derive(Debug, Clone)]
123pub struct ExfiltrationGuard {
124    config: ExfiltrationGuardConfig,
125}
126
127impl ExfiltrationGuard {
128    /// Create a new guard from the given configuration.
129    ///
130    /// # Examples
131    ///
132    /// ```rust
133    /// use zeph_sanitizer::exfiltration::ExfiltrationGuard;
134    /// use zeph_config::ExfiltrationGuardConfig;
135    ///
136    /// let guard = ExfiltrationGuard::new(ExfiltrationGuardConfig::default());
137    /// ```
138    #[must_use]
139    pub fn new(config: ExfiltrationGuardConfig) -> Self {
140        Self { config }
141    }
142
143    /// Scan LLM output text and strip external markdown images.
144    ///
145    /// Returns the cleaned text and a list of [`ExfiltrationEvent::MarkdownImageBlocked`]
146    /// for each image that was removed.
147    ///
148    /// When `block_markdown_images` is `false`, returns the input unchanged.
149    ///
150    /// # Scanning coverage
151    /// - Inline images: `![alt](https://evil.com/track.gif)`
152    /// - Reference-style images: `![alt][ref]` + `[ref]: https://evil.com/img`
153    /// - Percent-encoded URLs inside already-captured groups: decoded before `is_external_url()`
154    ///
155    /// # Not covered (Phase 5, tracked in #1195)
156    /// - Percent-encoded scheme bypass: `%68ttps://evil.com` — the regex requires literal
157    ///   `https?://`, so a percent-encoded scheme is never captured. Fix requires pre-decoding
158    ///   the full input text before regex matching.
159    /// - HTML `<img src="...">` tags
160    /// - Unicode zero-width joiner tricks (`!\u{200B}[alt](url)`)
161    /// - Reference definitions inside fenced code blocks (false positive risk)
162    ///
163    /// # Panics
164    ///
165    /// Panics if the compiled regex does not produce expected capture groups (compile-time
166    /// guarantee — the regex patterns are validated via `expect` in `LazyLock` initializers).
167    #[must_use]
168    pub fn scan_output(&self, text: &str) -> (String, Vec<ExfiltrationEvent>) {
169        if !self.config.block_markdown_images {
170            return (text.to_owned(), vec![]);
171        }
172
173        let mut events = Vec::new();
174        let mut result = text.to_owned();
175
176        // --- Pass 1: inline images ---
177        let mut replacement = String::new();
178        let mut last_end = 0usize;
179        for cap in MARKDOWN_IMAGE_RE.captures_iter(text) {
180            let m = cap.get(0).expect("full match");
181            let raw_url = cap.get(2).expect("url group").as_str();
182            let url = percent_decode_url(raw_url);
183
184            if is_external_url(&url) {
185                replacement.push_str(&text[last_end..m.start()]);
186                let _ = write!(replacement, "[image removed: {url}]");
187                last_end = m.end();
188                events.push(ExfiltrationEvent::MarkdownImageBlocked { url });
189            }
190        }
191        if !events.is_empty() || last_end > 0 {
192            replacement.push_str(&text[last_end..]);
193            result = replacement;
194        }
195
196        // --- Pass 2: reference-style images ---
197        // Collect reference definitions from the (already partially cleaned) result.
198        let mut ref_defs: std::collections::HashMap<String, String> =
199            std::collections::HashMap::new();
200        for cap in REFERENCE_DEF_RE.captures_iter(&result) {
201            let label = cap.get(1).expect("label").as_str().to_lowercase();
202            let raw_url = cap.get(2).expect("url").as_str();
203            let url = percent_decode_url(raw_url);
204            if is_external_url(&url) {
205                ref_defs.insert(label, url);
206            }
207        }
208
209        if !ref_defs.is_empty() {
210            // Remove reference usages that point to external defs.
211            let mut cleaned = String::with_capacity(result.len());
212            let mut last_end = 0usize;
213            for cap in REFERENCE_USAGE_RE.captures_iter(&result) {
214                let m = cap.get(0).expect("full match");
215                let label = cap.get(2).expect("label").as_str().to_lowercase();
216                if let Some(url) = ref_defs.get(&label) {
217                    cleaned.push_str(&result[last_end..m.start()]);
218                    let _ = write!(cleaned, "[image removed: {url}]");
219                    last_end = m.end();
220                    events.push(ExfiltrationEvent::MarkdownImageBlocked { url: url.clone() });
221                }
222            }
223            cleaned.push_str(&result[last_end..]);
224            result = cleaned;
225
226            // Remove the reference definition lines for blocked refs.
227            // Use split('\n') (not .lines()) to preserve \r in CRLF line endings —
228            // .lines() strips \r, and reconstruction with push('\n') would silently
229            // convert all CRLF to LF throughout the entire text.
230            let mut def_cleaned = String::with_capacity(result.len());
231            for line in result.split('\n') {
232                let mut keep = true;
233                for cap in REFERENCE_DEF_RE.captures_iter(line) {
234                    let label = cap.get(1).expect("label").as_str().to_lowercase();
235                    if ref_defs.contains_key(&label) {
236                        keep = false;
237                        break;
238                    }
239                }
240                if keep {
241                    def_cleaned.push_str(line);
242                    def_cleaned.push('\n');
243                }
244            }
245            // Preserve trailing newline behaviour of the original.
246            if !text.ends_with('\n') && def_cleaned.ends_with('\n') {
247                def_cleaned.pop();
248            }
249            result = def_cleaned;
250        }
251
252        (result, events)
253    }
254
255    /// Validate tool call arguments against a set of URLs flagged in untrusted content.
256    ///
257    /// Parses `args_json` as a JSON value and extracts all string leaves recursively to
258    /// avoid JSON-encoding bypasses (escaped slashes, unicode escapes, etc.).
259    ///
260    /// Returns one [`ExfiltrationEvent::SuspiciousToolUrl`] per matching URL.
261    /// When `validate_tool_urls` is `false`, always returns an empty vec.
262    ///
263    /// # Flag-only approach
264    /// Matching URLs are logged and counted but tool execution is NOT blocked. Blocking
265    /// would break legitimate workflows where the same URL appears in both a search result
266    /// and a subsequent fetch call. See design decision D1 in the architect handoff.
267    #[must_use]
268    pub fn validate_tool_call(
269        &self,
270        tool_name: &str,
271        args_json: &str,
272        flagged_urls: &HashSet<String>,
273    ) -> Vec<ExfiltrationEvent> {
274        if !self.config.validate_tool_urls || flagged_urls.is_empty() {
275            return vec![];
276        }
277
278        let parsed: serde_json::Value = match serde_json::from_str(args_json) {
279            Ok(v) => v,
280            Err(_) => {
281                // Fall back to raw regex scan if JSON is malformed.
282                return Self::scan_raw_args(tool_name, args_json, flagged_urls);
283            }
284        };
285
286        let mut events = Vec::new();
287        let mut strings = Vec::new();
288        collect_strings(&parsed, &mut strings);
289
290        for s in &strings {
291            for url_match in URL_EXTRACT_RE.find_iter(s) {
292                let url = url_match.as_str();
293                if flagged_urls.contains(url) {
294                    events.push(ExfiltrationEvent::SuspiciousToolUrl {
295                        url: url.to_owned(),
296                        tool_name: tool_name.into(),
297                    });
298                }
299            }
300        }
301
302        events
303    }
304
305    /// Check whether a memory write should skip Qdrant embedding.
306    ///
307    /// Returns `Some(MemoryWriteGuarded)` when `has_injection_flags` is `true` and
308    /// `guard_memory_writes` is enabled. The caller should still save to `SQLite` for
309    /// conversation continuity but omit the Qdrant embedding to prevent poisoned content
310    /// from polluting semantic search results.
311    ///
312    /// See design decision D2 in the architect handoff.
313    #[must_use]
314    pub fn should_guard_memory_write(
315        &self,
316        has_injection_flags: bool,
317    ) -> Option<ExfiltrationEvent> {
318        if !self.config.guard_memory_writes || !has_injection_flags {
319            return None;
320        }
321        Some(ExfiltrationEvent::MemoryWriteGuarded {
322            reason: "content contained injection patterns flagged by ContentSanitizer".to_owned(),
323        })
324    }
325
326    /// Extract URLs from untrusted tool output for use in subsequent `validate_tool_call` checks.
327    ///
328    fn scan_raw_args(
329        tool_name: &str,
330        args: &str,
331        flagged_urls: &HashSet<String>,
332    ) -> Vec<ExfiltrationEvent> {
333        URL_EXTRACT_RE
334            .find_iter(args)
335            .filter(|m| flagged_urls.contains(m.as_str()))
336            .map(|m| ExfiltrationEvent::SuspiciousToolUrl {
337                url: m.as_str().to_owned(),
338                tool_name: tool_name.into(),
339            })
340            .collect()
341    }
342}
343
344/// Extract all `http`/`https` URLs from `content` into a `HashSet` for later URL validation.
345///
346/// Call this after sanitizing untrusted tool output with `ContentSanitizer` when injection
347/// flags are present. Pass the returned set into the agent's `flagged_urls` field. Pass that
348/// set to [`ExfiltrationGuard::validate_tool_call`] on each subsequent tool call. Clear
349/// `flagged_urls` at the start of each `process_response` call (per-turn clearing strategy).
350///
351/// # Examples
352///
353/// ```rust
354/// use zeph_sanitizer::exfiltration::extract_flagged_urls;
355///
356/// let urls = extract_flagged_urls("visit https://evil.com/x and https://other.com/y");
357/// assert!(urls.contains("https://evil.com/x"));
358/// assert!(urls.contains("https://other.com/y"));
359/// assert_eq!(urls.len(), 2);
360/// ```
361#[must_use]
362pub fn extract_flagged_urls(content: &str) -> HashSet<String> {
363    URL_EXTRACT_RE
364        .find_iter(content)
365        .map(|m| m.as_str().to_owned())
366        .collect()
367}
368
369// ---------------------------------------------------------------------------
370// Helpers
371// ---------------------------------------------------------------------------
372
373/// Decode percent-encoded URL characters before exfiltration matching.
374///
375/// Converts `%68ttps://` → `https://` so simple percent-encoding bypasses are caught.
376/// Non-UTF-8 sequences are left as-is (they won't match `is_external_url`).
377fn percent_decode_url(raw: &str) -> String {
378    let mut out = String::with_capacity(raw.len());
379    let bytes = raw.as_bytes();
380    let mut i = 0;
381    while i < bytes.len() {
382        if bytes[i] == b'%'
383            && i + 2 < bytes.len()
384            && let (Some(hi), Some(lo)) = (
385                (bytes[i + 1] as char).to_digit(16),
386                (bytes[i + 2] as char).to_digit(16),
387            )
388        {
389            // hi and lo are 0-15; combined value is at most 0xFF, fits in u8.
390            #[allow(clippy::cast_possible_truncation)]
391            let byte = ((hi << 4) | lo) as u8;
392            out.push(byte as char);
393            i += 3;
394            continue;
395        }
396        out.push(bytes[i] as char);
397        i += 1;
398    }
399    out
400}
401
402fn is_external_url(url: &str) -> bool {
403    url.starts_with("http://") || url.starts_with("https://")
404}
405
406/// Recursively collect all string leaves from a JSON value.
407fn collect_strings<'a>(value: &'a serde_json::Value, out: &mut Vec<&'a str>) {
408    match value {
409        serde_json::Value::String(s) => out.push(s.as_str()),
410        serde_json::Value::Array(arr) => {
411            for v in arr {
412                collect_strings(v, out);
413            }
414        }
415        serde_json::Value::Object(map) => {
416            for v in map.values() {
417                collect_strings(v, out);
418            }
419        }
420        _ => {}
421    }
422}
423
424// ---------------------------------------------------------------------------
425// Tests
426// ---------------------------------------------------------------------------
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431
432    fn guard() -> ExfiltrationGuard {
433        ExfiltrationGuard::new(ExfiltrationGuardConfig::default())
434    }
435
436    fn guard_disabled() -> ExfiltrationGuard {
437        ExfiltrationGuard::new(ExfiltrationGuardConfig {
438            block_markdown_images: false,
439            validate_tool_urls: false,
440            guard_memory_writes: false,
441        })
442    }
443
444    // --- scan_output ---
445
446    #[test]
447    fn strips_external_inline_image() {
448        let (cleaned, events) =
449            guard().scan_output("Before ![track](https://evil.com/p.gif) after");
450        assert_eq!(
451            cleaned,
452            "Before [image removed: https://evil.com/p.gif] after"
453        );
454        assert_eq!(events.len(), 1);
455        assert!(
456            matches!(&events[0], ExfiltrationEvent::MarkdownImageBlocked { url } if url == "https://evil.com/p.gif")
457        );
458    }
459
460    #[test]
461    fn preserves_local_image() {
462        let text = "Look: ![diagram](./diagram.png) — local";
463        let (cleaned, events) = guard().scan_output(text);
464        assert_eq!(cleaned, text);
465        assert!(events.is_empty());
466    }
467
468    #[test]
469    fn preserves_data_uri() {
470        let text = "Inline: ![icon](data:image/png;base64,abc123)";
471        let (cleaned, events) = guard().scan_output(text);
472        assert_eq!(cleaned, text);
473        assert!(events.is_empty());
474    }
475
476    #[test]
477    fn strips_multiple_external_images() {
478        let text = "![a](https://a.com/1.gif) text ![b](https://b.com/2.gif)";
479        let (cleaned, events) = guard().scan_output(text);
480        // Markdown image syntax must be removed; replacement label may contain URLs.
481        assert!(
482            !cleaned.contains("![a]("),
483            "first image syntax must be removed: {cleaned}"
484        );
485        assert!(
486            !cleaned.contains("![b]("),
487            "second image syntax must be removed: {cleaned}"
488        );
489        assert_eq!(events.len(), 2);
490    }
491
492    #[test]
493    fn scan_output_noop_when_disabled() {
494        let text = "![track](https://evil.com/p.gif)";
495        let (cleaned, events) = guard_disabled().scan_output(text);
496        assert_eq!(cleaned, text);
497        assert!(events.is_empty());
498    }
499
500    #[test]
501    fn strips_reference_style_image() {
502        let text = "Here is the image: ![alt][ref]\n[ref]: https://evil.com/track.gif\nend";
503        let (cleaned, events) = guard().scan_output(text);
504        // The markdown image syntax and definition line must be removed.
505        assert!(
506            !cleaned.contains("![alt][ref]"),
507            "image usage syntax must be removed: {cleaned}"
508        );
509        assert!(
510            !cleaned.contains("[ref]:"),
511            "reference definition must be removed: {cleaned}"
512        );
513        assert!(
514            cleaned.contains("[image removed:"),
515            "replacement label must be present: {cleaned}"
516        );
517        assert!(!events.is_empty(), "must generate event");
518    }
519
520    #[test]
521    fn preserves_local_reference_image() {
522        // Reference pointing to a local path — must not be stripped.
523        let text = "![alt][ref]\n[ref]: ./local.png\n";
524        let (cleaned, events) = guard().scan_output(text);
525        assert_eq!(cleaned, text);
526        assert!(events.is_empty());
527    }
528
529    #[test]
530    fn decodes_percent_encoded_url_in_inline_image() {
531        // %68 = 'h', so %68ttps:// decodes to https://.
532        // The MARKDOWN_IMAGE_RE pattern requires a literal `https?://` prefix, so
533        // `%68ttps://` is NOT matched by the regex and passes through unchanged.
534        // percent_decode_url() is called on the URL *after* the regex captures it —
535        // so percent-encoded schemes bypass inline detection.
536        //
537        // Known bypass — tracked for Phase 5 (#1195): the fix requires pre-decoding the
538        // full text before regex matching (or a multi-pass decode+scan approach). The LLM
539        // context wrapper already limits what arrives here, reducing practical risk.
540        let text = "![t](%68ttps://evil.com/track.gif)";
541        let (cleaned, _events) = guard().scan_output(text);
542        // The text passes through unchanged because the regex didn't match.
543        assert_eq!(
544            cleaned, text,
545            "percent-encoded scheme not detected by inline regex"
546        );
547
548        // A normal https:// URL IS detected.
549        let normal = "![t](https://evil.com/track.gif)";
550        let (normal_cleaned, normal_events) = guard().scan_output(normal);
551        assert!(
552            !normal_cleaned.contains("![t](https://"),
553            "normal URL must be removed"
554        );
555        assert_eq!(normal_events.len(), 1);
556    }
557
558    #[test]
559    fn empty_alt_text_still_blocked() {
560        let text = "![](https://evil.com/p.gif)";
561        let (cleaned, events) = guard().scan_output(text);
562        // The original markdown image syntax must be removed; the replacement label may contain the URL.
563        assert!(
564            !cleaned.contains("![]("),
565            "markdown image syntax must be removed: {cleaned}"
566        );
567        assert!(
568            cleaned.contains("[image removed:"),
569            "replacement label must be present: {cleaned}"
570        );
571        assert_eq!(events.len(), 1);
572    }
573
574    // --- validate_tool_call ---
575
576    #[test]
577    fn detects_flagged_url_in_json_string() {
578        let mut flagged = HashSet::new();
579        flagged.insert("https://evil.com/payload".to_owned());
580        let args = r#"{"url": "https://evil.com/payload"}"#;
581        let events = guard().validate_tool_call("fetch", args, &flagged);
582        assert_eq!(events.len(), 1);
583        assert!(
584            matches!(&events[0], ExfiltrationEvent::SuspiciousToolUrl { url, tool_name }
585            if url == "https://evil.com/payload" && tool_name == "fetch")
586        );
587    }
588
589    #[test]
590    fn no_event_when_url_not_flagged() {
591        let mut flagged = HashSet::new();
592        flagged.insert("https://other.com/benign".to_owned());
593        let args = r#"{"url": "https://legitimate.com/page"}"#;
594        let events = guard().validate_tool_call("fetch", args, &flagged);
595        assert!(events.is_empty());
596    }
597
598    #[test]
599    fn validate_tool_call_noop_when_disabled() {
600        let mut flagged = HashSet::new();
601        flagged.insert("https://evil.com/x".to_owned());
602        let args = r#"{"url": "https://evil.com/x"}"#;
603        let events = guard_disabled().validate_tool_call("fetch", args, &flagged);
604        assert!(events.is_empty());
605    }
606
607    #[test]
608    fn validate_tool_call_noop_with_empty_flagged() {
609        let args = r#"{"url": "https://evil.com/x"}"#;
610        let events = guard().validate_tool_call("fetch", args, &HashSet::new());
611        assert!(events.is_empty());
612    }
613
614    #[test]
615    fn extracts_urls_from_nested_json() {
616        let mut flagged = HashSet::new();
617        flagged.insert("https://evil.com/deep".to_owned());
618        let args = r#"{"nested": {"inner": ["https://evil.com/deep"]}}"#;
619        let events = guard().validate_tool_call("tool", args, &flagged);
620        assert_eq!(events.len(), 1);
621    }
622
623    #[test]
624    fn handles_escaped_slashes_in_json() {
625        // JSON-encoded URL with escaped forward slashes should still be detected
626        // after serde_json parsing (which unescapes the string value).
627        let mut flagged = HashSet::new();
628        flagged.insert("https://evil.com/path".to_owned());
629        // serde_json will unescape \/ → /
630        let args = r#"{"url": "https:\/\/evil.com\/path"}"#;
631        let parsed: serde_json::Value = serde_json::from_str(args).unwrap();
632        // Confirm serde_json unescapes it.
633        assert_eq!(parsed["url"], "https://evil.com/path");
634        let events = guard().validate_tool_call("fetch", args, &flagged);
635        assert_eq!(events.len(), 1, "JSON-escaped URL must be caught");
636    }
637
638    // --- should_guard_memory_write ---
639
640    #[test]
641    fn guards_when_injection_flags_set() {
642        let event = guard().should_guard_memory_write(true);
643        assert!(event.is_some());
644        assert!(matches!(
645            event.unwrap(),
646            ExfiltrationEvent::MemoryWriteGuarded { .. }
647        ));
648    }
649
650    #[test]
651    fn passes_when_no_injection_flags() {
652        let event = guard().should_guard_memory_write(false);
653        assert!(event.is_none());
654    }
655
656    #[test]
657    fn guard_memory_write_noop_when_disabled() {
658        let event = guard_disabled().should_guard_memory_write(true);
659        assert!(event.is_none());
660    }
661
662    // --- percent_decode_url ---
663
664    #[test]
665    fn percent_decode_roundtrip() {
666        assert_eq!(
667            percent_decode_url("https://example.com"),
668            "https://example.com"
669        );
670        assert_eq!(
671            percent_decode_url("%68ttps://example.com"),
672            "https://example.com"
673        );
674        assert_eq!(percent_decode_url("hello%20world"), "hello world");
675    }
676
677    // --- extract_flagged_urls ---
678
679    #[test]
680    fn extracts_urls_from_plain_text() {
681        let content = "check https://evil.com/x and https://other.com/y for details";
682        let urls = extract_flagged_urls(content);
683        assert!(urls.contains("https://evil.com/x"));
684        assert!(urls.contains("https://other.com/y"));
685    }
686}
zeph_sanitizer/exfiltration.rs

zeph_sanitizer/
exfiltration.rs