Skip to main content

agent_sdk/observability/
payload.rs

1//! `GenAI` payload conversion from SDK types to semconv JSON.
2//!
3//! Converts SDK [`ChatRequest`] / [`ChatResponse`] types into the JSON
4//! schemas defined by the `GenAI` semantic conventions for
5//! `gen_ai.system_instructions`, `gen_ai.input.messages`, and
6//! `gen_ai.output.messages`.
7//!
8//! # Privacy
9//!
10//! This layer sits directly in front of observability egress —
11//! whatever shape we emit here flows into `OTel` spans, `ObservabilityStore`
12//! implementations, and ultimately into third-party collectors
13//! (Langfuse, Datadog, honeycomb, …). [`PayloadRedactor`] wraps a
14//! pluggable [`PiiDetector`] and masks PII in every text and JSON
15//! string leaf before they leave this module, so the two conversion
16//! paths (directly serialized onto spans, or handed to the store)
17//! share one audited redaction pass.
18//!
19//! Callers that do not need redaction — including existing call sites
20//! that have not adopted the detector yet — should use
21//! [`PayloadRedactor::noop`]. Financial and other PII-sensitive
22//! integrations should construct a redactor with
23//! [`agent_sdk_foundation::privacy::BaselineDetector`] or a custom
24//! detector. Routing every conversion through the [`PayloadRedactor`]
25//! type keeps the unredacted-egress surface to a single audited path.
26
27use crate::llm::{ChatRequest, ChatResponse, Content, ContentBlock, Message, Role};
28use agent_sdk_foundation::privacy::{NoopDetector, PiiDetector, mask_spans};
29use serde_json::{Value, json};
30use std::sync::Arc;
31
32use super::attrs::finish_reason_str;
33
34/// Redacts PII from payloads before they flow to observability sinks.
35///
36/// Wraps a [`PiiDetector`] and applies it to every text and JSON
37/// string leaf emitted by [`convert_system_instructions`](Self::convert_system_instructions),
38/// [`convert_input_messages`](Self::convert_input_messages), and
39/// [`convert_output_messages`](Self::convert_output_messages).
40///
41/// Construct with [`PayloadRedactor::new`] wrapping any
42/// `Arc<dyn PiiDetector>`, or with [`PayloadRedactor::noop`] for the
43/// pass-through case. The redactor is `Clone` and `Send + Sync`, so
44/// one instance can be shared across the agent loop for the lifetime
45/// of a run.
46#[derive(Clone)]
47pub struct PayloadRedactor {
48    detector: Arc<dyn PiiDetector>,
49    is_noop: bool,
50}
51
52impl PayloadRedactor {
53    /// Wrap an existing detector.
54    #[must_use]
55    pub fn new(detector: Arc<dyn PiiDetector>) -> Self {
56        Self {
57            detector,
58            is_noop: false,
59        }
60    }
61
62    /// Redactor that performs no masking — produces byte-identical
63    /// JSON output to the raw conversion path.
64    #[must_use]
65    pub fn noop() -> Self {
66        Self {
67            detector: Arc::new(NoopDetector),
68            is_noop: true,
69        }
70    }
71
72    /// Whether this redactor performs no masking.
73    ///
74    /// `true` for [`PayloadRedactor::noop`] / [`Default`]. The
75    /// payload-capture gate uses this to refuse `Inline` payloads from a
76    /// store that attested PII redaction but left the default noop redactor
77    /// in place — the attestation alone is not sufficient evidence that PII
78    /// is actually being masked.
79    #[must_use]
80    pub const fn is_noop(&self) -> bool {
81        self.is_noop
82    }
83
84    /// Convert system instructions, masking PII in the system prompt.
85    ///
86    /// Returns `None` if the system prompt is empty.
87    #[must_use]
88    pub fn convert_system_instructions(&self, request: &ChatRequest) -> Option<Value> {
89        if request.system.is_empty() {
90            return None;
91        }
92        Some(json!([{"text": self.mask_str(&request.system)}]))
93    }
94
95    /// Convert input messages into semconv JSON, masking PII in every
96    /// text and tool-argument leaf.
97    #[must_use]
98    pub fn convert_input_messages(&self, request: &ChatRequest) -> Value {
99        let messages: Vec<Value> = request
100            .messages
101            .iter()
102            .map(|m| self.convert_message(m))
103            .collect();
104        Value::Array(messages)
105    }
106
107    /// Convert a [`ChatResponse`] into semconv output-messages JSON,
108    /// masking PII in every assistant text, thinking text, and
109    /// tool-argument leaf.
110    #[must_use]
111    pub fn convert_output_messages(&self, response: &ChatResponse) -> Value {
112        let parts: Vec<Value> = response
113            .content
114            .iter()
115            .filter_map(|b| self.convert_block(b))
116            .collect();
117        let mut message = json!({
118            "role": "assistant",
119            "content": Value::Array(parts),
120        });
121        if let Some(reason) = response.stop_reason {
122            message["finish_reason"] = json!(finish_reason_str(reason));
123        }
124        json!([message])
125    }
126
127    fn convert_message(&self, message: &Message) -> Value {
128        let role = match message.role {
129            Role::User => determine_user_message_role(message),
130            Role::Assistant => "assistant",
131        };
132        let content = self.convert_content(&message.content);
133        json!({
134            "role": role,
135            "content": content,
136        })
137    }
138
139    fn convert_content(&self, content: &Content) -> Value {
140        match content {
141            Content::Text(text) => json!([{"text": self.mask_str(text)}]),
142            Content::Blocks(blocks) => {
143                let parts: Vec<Value> = blocks
144                    .iter()
145                    .filter_map(|b| self.convert_block(b))
146                    .collect();
147                Value::Array(parts)
148            }
149        }
150    }
151
152    fn convert_block(&self, block: &ContentBlock) -> Option<Value> {
153        match block {
154            ContentBlock::Text { text } => Some(json!({"text": self.mask_str(text)})),
155            ContentBlock::Thinking { thinking, .. } => Some(json!({
156                "type": "reasoning",
157                "text": self.mask_str(thinking),
158            })),
159            ContentBlock::ToolUse {
160                id, name, input, ..
161            } => {
162                let masked_input = self.mask_json(input);
163                Some(json!({
164                    "type": "tool_call",
165                    "id": id,
166                    "name": name,
167                    "arguments": masked_input.to_string(),
168                }))
169            }
170            ContentBlock::ToolResult {
171                tool_use_id,
172                content,
173                is_error,
174            } => {
175                let mut part = json!({
176                    "type": "tool_call_response",
177                    "id": tool_use_id,
178                    "output": self.mask_str(content),
179                });
180                if *is_error == Some(true) {
181                    part["is_error"] = json!(true);
182                }
183                Some(part)
184            }
185            ContentBlock::Image { source } => Some(json!({
186                "type": "blob",
187                "mime_type": source.media_type,
188                "modality": "image",
189                "size": source.data.len(),
190            })),
191            ContentBlock::Document { source } => {
192                let mut part = json!({
193                    "type": "blob",
194                    "mime_type": source.media_type,
195                    "size": source.data.len(),
196                });
197                if source.media_type.starts_with("image/") {
198                    part["modality"] = json!("image");
199                }
200                Some(part)
201            }
202            // Redacted-thinking blocks carry nothing to surface; this also
203            // omits unknown future `#[non_exhaustive]` block kinds from the
204            // observability payload.
205            _ => None,
206        }
207    }
208
209    /// Mask PII in a plain string.
210    fn mask_str(&self, text: &str) -> String {
211        let spans = self.detector.detect(text);
212        if spans.is_empty() {
213            text.to_owned()
214        } else {
215            mask_spans(text, &spans)
216        }
217    }
218
219    /// Recursively mask every string leaf within a JSON value.
220    fn mask_json(&self, value: &Value) -> Value {
221        match value {
222            Value::String(s) => Value::String(self.mask_str(s)),
223            Value::Array(arr) => Value::Array(arr.iter().map(|v| self.mask_json(v)).collect()),
224            Value::Object(map) => Value::Object(
225                map.iter()
226                    .map(|(k, v)| (k.clone(), self.mask_json(v)))
227                    .collect(),
228            ),
229            Value::Null | Value::Bool(_) | Value::Number(_) => value.clone(),
230        }
231    }
232}
233
234impl Default for PayloadRedactor {
235    fn default() -> Self {
236        Self::noop()
237    }
238}
239
240/// Decide whether a User-role message is actually a `tool` message
241/// (SDK batches tool results as User messages).
242fn determine_user_message_role(message: &Message) -> &'static str {
243    match &message.content {
244        Content::Blocks(blocks) => {
245            let has_tool_result = blocks
246                .iter()
247                .any(|b| matches!(b, ContentBlock::ToolResult { .. }));
248            if has_tool_result { "tool" } else { "user" }
249        }
250        Content::Text(_) => "user",
251    }
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257    use crate::llm::{ChatRequest, ChatResponse, ContentSource, StopReason, Usage};
258    use agent_sdk_foundation::privacy::BaselineDetector;
259
260    fn empty_request(system: &str, messages: Vec<Message>) -> ChatRequest {
261        ChatRequest {
262            system: system.to_owned(),
263            messages,
264            tools: None,
265            max_tokens: 1024,
266            max_tokens_explicit: false,
267            session_id: None,
268            cached_content: None,
269            thinking: None,
270            tool_choice: None,
271            response_format: None,
272            cache: None,
273        }
274    }
275
276    // ── Noop redactor / free functions preserve existing behaviour ──
277
278    #[test]
279    fn empty_system_returns_none() {
280        let request = empty_request("", vec![]);
281        assert!(
282            PayloadRedactor::noop()
283                .convert_system_instructions(&request)
284                .is_none()
285        );
286    }
287
288    #[test]
289    fn system_instructions_wraps_in_text_array() -> anyhow::Result<()> {
290        use anyhow::Context as _;
291        let request = empty_request("You are helpful.", vec![]);
292        let result = PayloadRedactor::noop()
293            .convert_system_instructions(&request)
294            .context("should be Some")?;
295        assert_eq!(result, json!([{"text": "You are helpful."}]));
296        Ok(())
297    }
298
299    #[test]
300    fn user_text_message_converts_correctly() {
301        let msg = Message::user("Hello");
302        let result = PayloadRedactor::noop().convert_message(&msg);
303        assert_eq!(result["role"], "user");
304        assert_eq!(result["content"][0]["text"], "Hello");
305    }
306
307    #[test]
308    fn assistant_text_message_converts_correctly() {
309        let msg = Message::assistant("Hi there");
310        let result = PayloadRedactor::noop().convert_message(&msg);
311        assert_eq!(result["role"], "assistant");
312        assert_eq!(result["content"][0]["text"], "Hi there");
313    }
314
315    #[test]
316    fn tool_result_batch_maps_to_tool_role() {
317        let msg = Message {
318            role: Role::User,
319            content: Content::Blocks(vec![ContentBlock::ToolResult {
320                tool_use_id: "call_1".to_string(),
321                content: "result data".to_string(),
322                is_error: None,
323            }]),
324        };
325        let result = PayloadRedactor::noop().convert_message(&msg);
326        assert_eq!(result["role"], "tool");
327        assert_eq!(result["content"][0]["type"], "tool_call_response");
328        assert_eq!(result["content"][0]["id"], "call_1");
329        assert_eq!(result["content"][0]["output"], "result data");
330    }
331
332    #[test]
333    fn tool_result_with_image_attachment_stays_in_tool_message() {
334        let msg = Message {
335            role: Role::User,
336            content: Content::Blocks(vec![
337                ContentBlock::ToolResult {
338                    tool_use_id: "call_1".to_string(),
339                    content: "screenshot taken".to_string(),
340                    is_error: None,
341                },
342                ContentBlock::Image {
343                    source: ContentSource::new("image/png", "aWdv"),
344                },
345            ]),
346        };
347        let result = PayloadRedactor::noop().convert_message(&msg);
348        assert_eq!(result["role"], "tool");
349        assert_eq!(result["content"][0]["type"], "tool_call_response");
350        assert_eq!(result["content"][1]["type"], "blob");
351        assert_eq!(result["content"][1]["modality"], "image");
352    }
353
354    #[test]
355    fn thinking_block_maps_to_reasoning_part() {
356        let msg = Message {
357            role: Role::Assistant,
358            content: Content::Blocks(vec![
359                ContentBlock::Thinking {
360                    thinking: "Let me think...".to_string(),
361                    signature: None,
362                },
363                ContentBlock::Text {
364                    text: "The answer is 42".to_string(),
365                },
366            ]),
367        };
368        let result = PayloadRedactor::noop().convert_message(&msg);
369        assert_eq!(result["content"][0]["type"], "reasoning");
370        assert_eq!(result["content"][0]["text"], "Let me think...");
371        assert_eq!(result["content"][1]["text"], "The answer is 42");
372    }
373
374    #[test]
375    fn redacted_thinking_is_omitted() {
376        let msg = Message {
377            role: Role::Assistant,
378            content: Content::Blocks(vec![
379                ContentBlock::RedactedThinking {
380                    data: "secret".to_string(),
381                },
382                ContentBlock::Text {
383                    text: "visible".to_string(),
384                },
385            ]),
386        };
387        let result = PayloadRedactor::noop().convert_message(&msg);
388        let content = result["content"].as_array().expect("array");
389        assert_eq!(content.len(), 1);
390        assert_eq!(content[0]["text"], "visible");
391    }
392
393    #[test]
394    fn tool_use_block_maps_to_tool_call_part() {
395        let msg = Message {
396            role: Role::Assistant,
397            content: Content::Blocks(vec![ContentBlock::ToolUse {
398                id: "call_1".to_string(),
399                name: "read".to_string(),
400                input: json!({"path": "/tmp/test.rs"}),
401                thought_signature: None,
402            }]),
403        };
404        let result = PayloadRedactor::noop().convert_message(&msg);
405        assert_eq!(result["content"][0]["type"], "tool_call");
406        assert_eq!(result["content"][0]["id"], "call_1");
407        assert_eq!(result["content"][0]["name"], "read");
408    }
409
410    #[test]
411    fn document_block_maps_to_blob_part() {
412        let msg = Message {
413            role: Role::User,
414            content: Content::Blocks(vec![ContentBlock::Document {
415                source: ContentSource::new("application/pdf", "cGRm"),
416            }]),
417        };
418        let result = PayloadRedactor::noop().convert_message(&msg);
419        assert_eq!(result["content"][0]["type"], "blob");
420        assert_eq!(result["content"][0]["mime_type"], "application/pdf");
421        assert_eq!(result["content"][0]["size"], 4);
422    }
423
424    #[test]
425    fn output_messages_includes_finish_reason() {
426        let response = ChatResponse {
427            id: "resp_1".to_string(),
428            content: vec![ContentBlock::Text {
429                text: "Done".to_string(),
430            }],
431            model: "test-model".to_string(),
432            stop_reason: Some(StopReason::EndTurn),
433            usage: Usage {
434                input_tokens: 10,
435                output_tokens: 5,
436                cached_input_tokens: 0,
437                cache_creation_input_tokens: 0,
438            },
439        };
440        let result = PayloadRedactor::noop().convert_output_messages(&response);
441        let msg = &result[0];
442        assert_eq!(msg["role"], "assistant");
443        assert_eq!(msg["finish_reason"], "stop");
444        assert_eq!(msg["content"][0]["text"], "Done");
445    }
446
447    #[test]
448    fn output_messages_tool_call_finish_reason() {
449        let response = ChatResponse {
450            id: "resp_1".to_string(),
451            content: vec![ContentBlock::ToolUse {
452                id: "c1".to_string(),
453                name: "bash".to_string(),
454                input: json!({"command": "ls"}),
455                thought_signature: None,
456            }],
457            model: "test-model".to_string(),
458            stop_reason: Some(StopReason::ToolUse),
459            usage: Usage {
460                input_tokens: 10,
461                output_tokens: 5,
462                cached_input_tokens: 0,
463                cache_creation_input_tokens: 0,
464            },
465        };
466        let result = PayloadRedactor::noop().convert_output_messages(&response);
467        assert_eq!(result[0]["finish_reason"], "tool_call");
468    }
469
470    #[test]
471    fn tool_result_error_flag_is_preserved() {
472        let msg = Message {
473            role: Role::User,
474            content: Content::Blocks(vec![ContentBlock::ToolResult {
475                tool_use_id: "call_1".to_string(),
476                content: "failed".to_string(),
477                is_error: Some(true),
478            }]),
479        };
480        let result = PayloadRedactor::noop().convert_message(&msg);
481        assert_eq!(result["content"][0]["is_error"], true);
482    }
483
484    #[test]
485    fn input_messages_preserves_order() {
486        let request = empty_request(
487            "",
488            vec![
489                Message::user("first"),
490                Message::assistant("second"),
491                Message::user("third"),
492            ],
493        );
494        let result = PayloadRedactor::noop().convert_input_messages(&request);
495        let arr = result.as_array().expect("array");
496        assert_eq!(arr.len(), 3);
497        assert_eq!(arr[0]["role"], "user");
498        assert_eq!(arr[1]["role"], "assistant");
499        assert_eq!(arr[2]["role"], "user");
500    }
501
502    // ── Baseline redactor masks PII across every covered surface ──
503
504    fn baseline_redactor() -> PayloadRedactor {
505        PayloadRedactor::new(Arc::new(
506            BaselineDetector::new().expect("baseline compiles"),
507        ))
508    }
509
510    #[test]
511    fn redacts_email_in_system_prompt() {
512        let request = empty_request("Contact support at ops@example.com.", vec![]);
513        let result = baseline_redactor()
514            .convert_system_instructions(&request)
515            .expect("some");
516        let text = result[0]["text"].as_str().expect("text string");
517        assert!(
518            text.contains("[REDACTED:email]"),
519            "system prompt not redacted: {text}"
520        );
521        assert!(!text.contains("ops@example.com"));
522    }
523
524    #[test]
525    fn redacts_cpf_in_user_text() {
526        let request = empty_request("", vec![Message::user("meu CPF é 111.444.777-35")]);
527        let result = baseline_redactor().convert_input_messages(&request);
528        let text = result[0]["content"][0]["text"].as_str().expect("text");
529        assert!(text.contains("[REDACTED:cpf]"), "user text: {text}");
530        assert!(!text.contains("111.444.777-35"));
531    }
532
533    #[test]
534    fn redacts_pan_in_tool_result_output() {
535        let msg = Message {
536            role: Role::User,
537            content: Content::Blocks(vec![ContentBlock::ToolResult {
538                tool_use_id: "c1".to_string(),
539                content: "charged card 4111 1111 1111 1111 successfully".to_string(),
540                is_error: None,
541            }]),
542        };
543        let result = baseline_redactor().convert_message(&msg);
544        let output = result["content"][0]["output"].as_str().expect("output");
545        assert!(
546            output.contains("[REDACTED:credit_card]"),
547            "tool output: {output}"
548        );
549        assert!(!output.contains("4111 1111 1111 1111"));
550    }
551
552    #[test]
553    fn redacts_strings_inside_tool_call_arguments_json() {
554        let msg = Message {
555            role: Role::Assistant,
556            content: Content::Blocks(vec![ContentBlock::ToolUse {
557                id: "c1".to_string(),
558                name: "send_pix".to_string(),
559                input: json!({
560                    "chave_pix": "ana@example.com",
561                    "amount_brl": 100,
562                    "metadata": {
563                        "recipient_cpf": "111.444.777-35",
564                        "note": "salário"
565                    }
566                }),
567                thought_signature: None,
568            }]),
569        };
570        let result = baseline_redactor().convert_message(&msg);
571        let args = result["content"][0]["arguments"].as_str().expect("args");
572        assert!(args.contains("[REDACTED:email]"), "args: {args}");
573        assert!(args.contains("[REDACTED:cpf]"), "args: {args}");
574        assert!(!args.contains("ana@example.com"));
575        assert!(!args.contains("111.444.777-35"));
576        // Non-string leaves are preserved as-is.
577        assert!(args.contains("100"));
578    }
579
580    #[test]
581    fn redacts_secret_in_assistant_output() {
582        let response = ChatResponse {
583            id: "r1".to_string(),
584            content: vec![ContentBlock::Text {
585                text: "here is the key sk-abcdefghijklmnopqrstuv for ci".to_string(),
586            }],
587            model: "m".to_string(),
588            stop_reason: Some(StopReason::EndTurn),
589            usage: Usage {
590                input_tokens: 0,
591                output_tokens: 0,
592                cached_input_tokens: 0,
593                cache_creation_input_tokens: 0,
594            },
595        };
596        let result = baseline_redactor().convert_output_messages(&response);
597        let text = result[0]["content"][0]["text"].as_str().expect("text");
598        assert!(text.contains("[REDACTED:secret]"), "output: {text}");
599        assert!(!text.contains("sk-abcdefghijklmnopqrstuv"));
600    }
601
602    #[test]
603    fn redacts_pii_in_thinking_text() {
604        let msg = Message {
605            role: Role::Assistant,
606            content: Content::Blocks(vec![ContentBlock::Thinking {
607                thinking: "User CPF is 111.444.777-35 — I should confirm before sending."
608                    .to_string(),
609                signature: None,
610            }]),
611        };
612        let result = baseline_redactor().convert_message(&msg);
613        let text = result["content"][0]["text"].as_str().expect("text");
614        assert!(text.contains("[REDACTED:cpf]"), "thinking: {text}");
615    }
616
617    #[test]
618    fn mask_json_preserves_non_string_leaves() {
619        let input = json!({
620            "amount": 42.5,
621            "active": true,
622            "items": null,
623            "email": "ana@example.com"
624        });
625        let redacted = baseline_redactor().mask_json(&input);
626        assert_eq!(redacted["amount"], json!(42.5));
627        assert_eq!(redacted["active"], json!(true));
628        assert_eq!(redacted["items"], json!(null));
629        assert!(
630            redacted["email"]
631                .as_str()
632                .expect("email")
633                .contains("[REDACTED:email]")
634        );
635    }
636
637    #[test]
638    fn is_noop_reflects_constructor() {
639        assert!(PayloadRedactor::noop().is_noop());
640        assert!(PayloadRedactor::default().is_noop());
641        assert!(
642            !baseline_redactor().is_noop(),
643            "a detector-backed redactor must not report as noop"
644        );
645    }
646}